diff --git a/auto-generated/bfloat16/api-testing/vcreate.c b/auto-generated/bfloat16/api-testing/vcreate.c
new file mode 100644
index 000000000..6f3316ad1
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vcreate.c
@@ -0,0 +1,177 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16m2_t test_vcreate_v_bf16m1_bf16m2(vbfloat16m1_t v0, vbfloat16m1_t v1) {
+  return __riscv_vcreate_v_bf16m1_bf16m2(v0, v1);
+}
+
+vbfloat16m4_t test_vcreate_v_bf16m1_bf16m4(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2, vbfloat16m1_t v3) {
+  return __riscv_vcreate_v_bf16m1_bf16m4(v0, v1, v2, v3);
+}
+
+vbfloat16m8_t test_vcreate_v_bf16m1_bf16m8(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                           vbfloat16m1_t v4, vbfloat16m1_t v5,
+                                           vbfloat16m1_t v6, vbfloat16m1_t v7) {
+  return __riscv_vcreate_v_bf16m1_bf16m8(v0, v1, v2, v3, v4, v5, v6, v7);
+}
+
+vbfloat16m4_t test_vcreate_v_bf16m2_bf16m4(vbfloat16m2_t v0, vbfloat16m2_t v1) {
+  return __riscv_vcreate_v_bf16m2_bf16m4(v0, v1);
+}
+
+vbfloat16m8_t test_vcreate_v_bf16m2_bf16m8(vbfloat16m2_t v0, vbfloat16m2_t v1,
+                                           vbfloat16m2_t v2, vbfloat16m2_t v3) {
+  return __riscv_vcreate_v_bf16m2_bf16m8(v0, v1, v2, v3);
+}
+
+vbfloat16m8_t test_vcreate_v_bf16m4_bf16m8(vbfloat16m4_t v0, vbfloat16m4_t v1) {
+  return __riscv_vcreate_v_bf16m4_bf16m8(v0, v1);
+}
+
+vbfloat16mf4x2_t test_vcreate_v_bf16mf4x2(vbfloat16mf4_t v0,
+                                          vbfloat16mf4_t v1) {
+  return __riscv_vcreate_v_bf16mf4x2(v0, v1);
+}
+
+vbfloat16mf4x3_t test_vcreate_v_bf16mf4x3(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
+                                          vbfloat16mf4_t v2) {
+  return __riscv_vcreate_v_bf16mf4x3(v0, v1, v2);
+}
+
+vbfloat16mf4x4_t test_vcreate_v_bf16mf4x4(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
+                                          vbfloat16mf4_t v2,
+                                          vbfloat16mf4_t v3) {
+  return __riscv_vcreate_v_bf16mf4x4(v0, v1, v2, v3);
+}
+
+vbfloat16mf4x5_t test_vcreate_v_bf16mf4x5(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
+                                          vbfloat16mf4_t v2, vbfloat16mf4_t v3,
+                                          vbfloat16mf4_t v4) {
+  return __riscv_vcreate_v_bf16mf4x5(v0, v1, v2, v3, v4);
+}
+
+vbfloat16mf4x6_t test_vcreate_v_bf16mf4x6(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
+                                          vbfloat16mf4_t v2, vbfloat16mf4_t v3,
+                                          vbfloat16mf4_t v4,
+                                          vbfloat16mf4_t v5) {
+  return __riscv_vcreate_v_bf16mf4x6(v0, v1, v2, v3, v4, v5);
+}
+
+vbfloat16mf4x7_t test_vcreate_v_bf16mf4x7(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
+                                          vbfloat16mf4_t v2, vbfloat16mf4_t v3,
+                                          vbfloat16mf4_t v4, vbfloat16mf4_t v5,
+                                          vbfloat16mf4_t v6) {
+  return __riscv_vcreate_v_bf16mf4x7(v0, v1, v2, v3, v4, v5, v6);
+}
+
+vbfloat16mf4x8_t test_vcreate_v_bf16mf4x8(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
+                                          vbfloat16mf4_t v2, vbfloat16mf4_t v3,
+                                          vbfloat16mf4_t v4, vbfloat16mf4_t v5,
+                                          vbfloat16mf4_t v6,
+                                          vbfloat16mf4_t v7) {
+  return __riscv_vcreate_v_bf16mf4x8(v0, v1, v2, v3, v4, v5, v6, v7);
+}
+
+vbfloat16mf2x2_t test_vcreate_v_bf16mf2x2(vbfloat16mf2_t v0,
+                                          vbfloat16mf2_t v1) {
+  return __riscv_vcreate_v_bf16mf2x2(v0, v1);
+}
+
+vbfloat16mf2x3_t test_vcreate_v_bf16mf2x3(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
+                                          vbfloat16mf2_t v2) {
+  return __riscv_vcreate_v_bf16mf2x3(v0, v1, v2);
+}
+
+vbfloat16mf2x4_t test_vcreate_v_bf16mf2x4(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
+                                          vbfloat16mf2_t v2,
+                                          vbfloat16mf2_t v3) {
+  return __riscv_vcreate_v_bf16mf2x4(v0, v1, v2, v3);
+}
+
+vbfloat16mf2x5_t test_vcreate_v_bf16mf2x5(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
+                                          vbfloat16mf2_t v2, vbfloat16mf2_t v3,
+                                          vbfloat16mf2_t v4) {
+  return __riscv_vcreate_v_bf16mf2x5(v0, v1, v2, v3, v4);
+}
+
+vbfloat16mf2x6_t test_vcreate_v_bf16mf2x6(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
+                                          vbfloat16mf2_t v2, vbfloat16mf2_t v3,
+                                          vbfloat16mf2_t v4,
+                                          vbfloat16mf2_t v5) {
+  return __riscv_vcreate_v_bf16mf2x6(v0, v1, v2, v3, v4, v5);
+}
+
+vbfloat16mf2x7_t test_vcreate_v_bf16mf2x7(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
+                                          vbfloat16mf2_t v2, vbfloat16mf2_t v3,
+                                          vbfloat16mf2_t v4, vbfloat16mf2_t v5,
+                                          vbfloat16mf2_t v6) {
+  return __riscv_vcreate_v_bf16mf2x7(v0, v1, v2, v3, v4, v5, v6);
+}
+
+vbfloat16mf2x8_t test_vcreate_v_bf16mf2x8(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
+                                          vbfloat16mf2_t v2, vbfloat16mf2_t v3,
+                                          vbfloat16mf2_t v4, vbfloat16mf2_t v5,
+                                          vbfloat16mf2_t v6,
+                                          vbfloat16mf2_t v7) {
+  return __riscv_vcreate_v_bf16mf2x8(v0, v1, v2, v3, v4, v5, v6, v7);
+}
+
+vbfloat16m1x2_t test_vcreate_v_bf16m1x2(vbfloat16m1_t v0, vbfloat16m1_t v1) {
+  return __riscv_vcreate_v_bf16m1x2(v0, v1);
+}
+
+vbfloat16m1x3_t test_vcreate_v_bf16m1x3(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                        vbfloat16m1_t v2) {
+  return __riscv_vcreate_v_bf16m1x3(v0, v1, v2);
+}
+
+vbfloat16m1x4_t test_vcreate_v_bf16m1x4(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                        vbfloat16m1_t v2, vbfloat16m1_t v3) {
+  return __riscv_vcreate_v_bf16m1x4(v0, v1, v2, v3);
+}
+
+vbfloat16m1x5_t test_vcreate_v_bf16m1x5(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                        vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                        vbfloat16m1_t v4) {
+  return __riscv_vcreate_v_bf16m1x5(v0, v1, v2, v3, v4);
+}
+
+vbfloat16m1x6_t test_vcreate_v_bf16m1x6(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                        vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                        vbfloat16m1_t v4, vbfloat16m1_t v5) {
+  return __riscv_vcreate_v_bf16m1x6(v0, v1, v2, v3, v4, v5);
+}
+
+vbfloat16m1x7_t test_vcreate_v_bf16m1x7(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                        vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                        vbfloat16m1_t v4, vbfloat16m1_t v5,
+                                        vbfloat16m1_t v6) {
+  return __riscv_vcreate_v_bf16m1x7(v0, v1, v2, v3, v4, v5, v6);
+}
+
+vbfloat16m1x8_t test_vcreate_v_bf16m1x8(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                        vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                        vbfloat16m1_t v4, vbfloat16m1_t v5,
+                                        vbfloat16m1_t v6, vbfloat16m1_t v7) {
+  return __riscv_vcreate_v_bf16m1x8(v0, v1, v2, v3, v4, v5, v6, v7);
+}
+
+vbfloat16m2x2_t test_vcreate_v_bf16m2x2(vbfloat16m2_t v0, vbfloat16m2_t v1) {
+  return __riscv_vcreate_v_bf16m2x2(v0, v1);
+}
+
+vbfloat16m2x3_t test_vcreate_v_bf16m2x3(vbfloat16m2_t v0, vbfloat16m2_t v1,
+                                        vbfloat16m2_t v2) {
+  return __riscv_vcreate_v_bf16m2x3(v0, v1, v2);
+}
+
+vbfloat16m2x4_t test_vcreate_v_bf16m2x4(vbfloat16m2_t v0, vbfloat16m2_t v1,
+                                        vbfloat16m2_t v2, vbfloat16m2_t v3) {
+  return __riscv_vcreate_v_bf16m2x4(v0, v1, v2, v3);
+}
+
+vbfloat16m4x2_t test_vcreate_v_bf16m4x2(vbfloat16m4_t v0, vbfloat16m4_t v1) {
+  return __riscv_vcreate_v_bf16m4x2(v0, v1);
+}
diff --git a/auto-generated/bfloat16/api-testing/vfncvtbf16.c b/auto-generated/bfloat16/api-testing/vfncvtbf16.c
new file mode 100644
index 000000000..ca33a95f1
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vfncvtbf16.c
@@ -0,0 +1,92 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4(vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4(vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2(vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2(vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1(vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1(vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2(vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2(vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4(vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4(vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_m(vbool64_t vm, vfloat32mf2_t vs2,
+                                               size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_m(vm, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_m(vbool32_t vm, vfloat32m1_t vs2,
+                                               size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_m(vm, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_m(vbool16_t vm, vfloat32m2_t vs2,
+                                             size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_m(vm, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_m(vbool8_t vm, vfloat32m4_t vs2,
+                                             size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_m(vm, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_m(vbool4_t vm, vfloat32m8_t vs2,
+                                             size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_m(vm, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm(vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_rm(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm(vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_rm(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm(vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_rm(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm(vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_rm(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm(vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_rm(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf4_t
+test_vfncvtbf16_f_f_w_bf16mf4_rm_m(vbool64_t vm, vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_m(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_m(vbool32_t vm,
+                                                  vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_m(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_m(vbool16_t vm, vfloat32m2_t vs2,
+                                                size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_rm_m(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_m(vbool8_t vm, vfloat32m4_t vs2,
+                                                size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_rm_m(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_m(vbool4_t vm, vfloat32m8_t vs2,
+                                                size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_rm_m(vm, vs2, __RISCV_FRM_RNE, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vfwcvtbf16.c b/auto-generated/bfloat16/api-testing/vfwcvtbf16.c
new file mode 100644
index 000000000..762fa909d
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vfwcvtbf16.c
@@ -0,0 +1,47 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2(vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32mf2(vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1(vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m1(vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2(vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m2(vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4(vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m4(vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8(vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m8(vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_m(vbool64_t vm, vbfloat16mf4_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32mf2_m(vm, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_m(vbool32_t vm, vbfloat16mf2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m1_m(vm, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_m(vbool16_t vm, vbfloat16m1_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m2_m(vm, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_m(vbool8_t vm, vbfloat16m2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m4_m(vm, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_m(vbool4_t vm, vbfloat16m4_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m8_m(vm, vs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vfwmaccbf16.c b/auto-generated/bfloat16/api-testing/vfwmaccbf16.c
new file mode 100644
index 000000000..5e48e1b89
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vfwmaccbf16.c
@@ -0,0 +1,233 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2(vfloat32mf2_t vd, vbfloat16mf4_t vs1,
+                                         vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2(vfloat32mf2_t vd, __bf16 vs1,
+                                         vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                       vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1(vfloat32m1_t vd, __bf16 vs1,
+                                       vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                       vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2(vfloat32m2_t vd, __bf16 vs1,
+                                       vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                       vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4(vfloat32m4_t vd, __bf16 vs1,
+                                       vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                       vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8(vfloat32m8_t vd, __bf16 vs1,
+                                       vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_m(vbool64_t vm, vfloat32mf2_t vd,
+                                           vbfloat16mf4_t vs1,
+                                           vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_m(vbool64_t vm, vfloat32mf2_t vd,
+                                           __bf16 vs1, vbfloat16mf4_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_m(vbool32_t vm, vfloat32m1_t vd,
+                                         vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_m(vbool32_t vm, vfloat32m1_t vd,
+                                         __bf16 vs1, vbfloat16mf2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_m(vbool16_t vm, vfloat32m2_t vd,
+                                         vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_m(vbool16_t vm, vfloat32m2_t vd,
+                                         __bf16 vs1, vbfloat16m1_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_m(vbool8_t vm, vfloat32m4_t vd,
+                                         vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_m(vbool8_t vm, vfloat32m4_t vd,
+                                         __bf16 vs1, vbfloat16m2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_m(vbool4_t vm, vfloat32m8_t vd,
+                                         vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_m(vbool4_t vm, vfloat32m8_t vd,
+                                         __bf16 vs1, vbfloat16m4_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm(vfloat32mf2_t vd,
+                                            vbfloat16mf4_t vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm(vfloat32mf2_t vd, __bf16 vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm(vfloat32m1_t vd, __bf16 vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                          vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm(vfloat32m2_t vd, __bf16 vs1,
+                                          vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                          vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm(vfloat32m4_t vd, __bf16 vs1,
+                                          vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                          vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm(vfloat32m8_t vd, __bf16 vs1,
+                                          vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_m(vbool64_t vm, vfloat32mf2_t vd,
+                                              vbfloat16mf4_t vs1,
+                                              vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                            vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_m(vbool64_t vm, vfloat32mf2_t vd,
+                                              __bf16 vs1, vbfloat16mf4_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                            vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_m(vbool32_t vm, vfloat32m1_t vd,
+                                            vbfloat16mf2_t vs1,
+                                            vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_m(vbool32_t vm, vfloat32m1_t vd,
+                                            __bf16 vs1, vbfloat16mf2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_m(vbool16_t vm, vfloat32m2_t vd,
+                                            vbfloat16m1_t vs1,
+                                            vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_m(vbool16_t vm, vfloat32m2_t vd,
+                                            __bf16 vs1, vbfloat16m1_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_m(vbool8_t vm, vfloat32m4_t vd,
+                                            vbfloat16m2_t vs1,
+                                            vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_m(vbool8_t vm, vfloat32m4_t vd,
+                                            __bf16 vs1, vbfloat16m2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_m(vbool4_t vm, vfloat32m8_t vd,
+                                            vbfloat16m4_t vs1,
+                                            vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_m(vbool4_t vm, vfloat32m8_t vd,
+                                            __bf16 vs1, vbfloat16m4_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vget.c b/auto-generated/bfloat16/api-testing/vget.c
new file mode 100644
index 000000000..0eafb6875
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vget.c
@@ -0,0 +1,140 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16m1_t test_vget_v_bf16m2_bf16m1(vbfloat16m2_t src, size_t index) {
+  return __riscv_vget_v_bf16m2_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m4_bf16m1(vbfloat16m4_t src, size_t index) {
+  return __riscv_vget_v_bf16m4_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m8_bf16m1(vbfloat16m8_t src, size_t index) {
+  return __riscv_vget_v_bf16m8_bf16m1(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m4_bf16m2(vbfloat16m4_t src, size_t index) {
+  return __riscv_vget_v_bf16m4_bf16m2(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m8_bf16m2(vbfloat16m8_t src, size_t index) {
+  return __riscv_vget_v_bf16m8_bf16m2(src, 0);
+}
+
+vbfloat16m4_t test_vget_v_bf16m8_bf16m4(vbfloat16m8_t src, size_t index) {
+  return __riscv_vget_v_bf16m8_bf16m4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x2_bf16mf4(vbfloat16mf4x2_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf4x2_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x3_bf16mf4(vbfloat16mf4x3_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf4x3_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x4_bf16mf4(vbfloat16mf4x4_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf4x4_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x5_bf16mf4(vbfloat16mf4x5_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf4x5_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x6_bf16mf4(vbfloat16mf4x6_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf4x6_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x7_bf16mf4(vbfloat16mf4x7_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf4x7_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x8_bf16mf4(vbfloat16mf4x8_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf4x8_bf16mf4(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x2_bf16mf2(vbfloat16mf2x2_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf2x2_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x3_bf16mf2(vbfloat16mf2x3_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf2x3_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x4_bf16mf2(vbfloat16mf2x4_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf2x4_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x5_bf16mf2(vbfloat16mf2x5_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf2x5_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x6_bf16mf2(vbfloat16mf2x6_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf2x6_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x7_bf16mf2(vbfloat16mf2x7_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf2x7_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x8_bf16mf2(vbfloat16mf2x8_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf2x8_bf16mf2(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x2_bf16m1(vbfloat16m1x2_t src, size_t index) {
+  return __riscv_vget_v_bf16m1x2_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x3_bf16m1(vbfloat16m1x3_t src, size_t index) {
+  return __riscv_vget_v_bf16m1x3_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x4_bf16m1(vbfloat16m1x4_t src, size_t index) {
+  return __riscv_vget_v_bf16m1x4_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x5_bf16m1(vbfloat16m1x5_t src, size_t index) {
+  return __riscv_vget_v_bf16m1x5_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x6_bf16m1(vbfloat16m1x6_t src, size_t index) {
+  return __riscv_vget_v_bf16m1x6_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x7_bf16m1(vbfloat16m1x7_t src, size_t index) {
+  return __riscv_vget_v_bf16m1x7_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x8_bf16m1(vbfloat16m1x8_t src, size_t index) {
+  return __riscv_vget_v_bf16m1x8_bf16m1(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m2x2_bf16m2(vbfloat16m2x2_t src, size_t index) {
+  return __riscv_vget_v_bf16m2x2_bf16m2(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m2x3_bf16m2(vbfloat16m2x3_t src, size_t index) {
+  return __riscv_vget_v_bf16m2x3_bf16m2(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m2x4_bf16m2(vbfloat16m2x4_t src, size_t index) {
+  return __riscv_vget_v_bf16m2x4_bf16m2(src, 0);
+}
+
+vbfloat16m4_t test_vget_v_bf16m4x2_bf16m4(vbfloat16m4x2_t src, size_t index) {
+  return __riscv_vget_v_bf16m4x2_bf16m4(src, 0);
+}
diff --git a/auto-generated/bfloat16/api-testing/vle16.c b/auto-generated/bfloat16/api-testing/vle16.c
new file mode 100644
index 000000000..ba320a6cb
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vle16.c
@@ -0,0 +1,53 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vle16_v_bf16mf4(const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf4(rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2(const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf2(rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1(const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m1(rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2(const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m2(rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4(const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m4(rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8(const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m8(rs1, vl);
+}
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                      size_t vl) {
+  return __riscv_vle16_v_bf16mf4_m(vm, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                      size_t vl) {
+  return __riscv_vle16_v_bf16mf2_m(vm, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                    size_t vl) {
+  return __riscv_vle16_v_bf16m1_m(vm, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m2_m(vm, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m4_m(vm, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m8_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vle16ff.c b/auto-generated/bfloat16/api-testing/vle16ff.c
new file mode 100644
index 000000000..f8d37c7dd
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vle16ff.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4(const __bf16 *rs1, size_t *new_vl,
+                                      size_t vl) {
+  return __riscv_vle16ff_v_bf16mf4(rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2(const __bf16 *rs1, size_t *new_vl,
+                                      size_t vl) {
+  return __riscv_vle16ff_v_bf16mf2(rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1(const __bf16 *rs1, size_t *new_vl,
+                                    size_t vl) {
+  return __riscv_vle16ff_v_bf16m1(rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2(const __bf16 *rs1, size_t *new_vl,
+                                    size_t vl) {
+  return __riscv_vle16ff_v_bf16m2(rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4(const __bf16 *rs1, size_t *new_vl,
+                                    size_t vl) {
+  return __riscv_vle16ff_v_bf16m4(rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8(const __bf16 *rs1, size_t *new_vl,
+                                    size_t vl) {
+  return __riscv_vle16ff_v_bf16m8(rs1, new_vl, vl);
+}
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16mf4_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16mf2_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m1_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m2_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m4_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m8_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlmul_ext_v.c b/auto-generated/bfloat16/api-testing/vlmul_ext_v.c
new file mode 100644
index 000000000..75285f967
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlmul_ext_v.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf2_t test_vlmul_ext_v_bf16mf4_bf16mf2(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_v_bf16mf4_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_ext_v_bf16mf4_bf16m1(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_v_bf16mf4_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_ext_v_bf16mf4_bf16m2(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_v_bf16mf4_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16mf4_bf16m4(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_v_bf16mf4_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16mf4_bf16m8(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_v_bf16mf4_bf16m8(value);
+}
+
+vbfloat16m1_t test_vlmul_ext_v_bf16mf2_bf16m1(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_v_bf16mf2_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_ext_v_bf16mf2_bf16m2(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_v_bf16mf2_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16mf2_bf16m4(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_v_bf16mf2_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16mf2_bf16m8(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_v_bf16mf2_bf16m8(value);
+}
+
+vbfloat16m2_t test_vlmul_ext_v_bf16m1_bf16m2(vbfloat16m1_t value) {
+  return __riscv_vlmul_ext_v_bf16m1_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16m1_bf16m4(vbfloat16m1_t value) {
+  return __riscv_vlmul_ext_v_bf16m1_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16m1_bf16m8(vbfloat16m1_t value) {
+  return __riscv_vlmul_ext_v_bf16m1_bf16m8(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16m2_bf16m4(vbfloat16m2_t value) {
+  return __riscv_vlmul_ext_v_bf16m2_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16m2_bf16m8(vbfloat16m2_t value) {
+  return __riscv_vlmul_ext_v_bf16m2_bf16m8(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16m4_bf16m8(vbfloat16m4_t value) {
+  return __riscv_vlmul_ext_v_bf16m4_bf16m8(value);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlmul_trunc_v.c b/auto-generated/bfloat16/api-testing/vlmul_trunc_v.c
new file mode 100644
index 000000000..97495502a
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlmul_trunc_v.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16mf2_bf16mf4(vbfloat16mf2_t value) {
+  return __riscv_vlmul_trunc_v_bf16mf2_bf16mf4(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m1_bf16mf4(vbfloat16m1_t value) {
+  return __riscv_vlmul_trunc_v_bf16m1_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m1_bf16mf2(vbfloat16m1_t value) {
+  return __riscv_vlmul_trunc_v_bf16m1_bf16mf2(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m2_bf16mf4(vbfloat16m2_t value) {
+  return __riscv_vlmul_trunc_v_bf16m2_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m2_bf16mf2(vbfloat16m2_t value) {
+  return __riscv_vlmul_trunc_v_bf16m2_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_trunc_v_bf16m2_bf16m1(vbfloat16m2_t value) {
+  return __riscv_vlmul_trunc_v_bf16m2_bf16m1(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m4_bf16mf4(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_v_bf16m4_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m4_bf16mf2(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_v_bf16m4_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_trunc_v_bf16m4_bf16m1(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_v_bf16m4_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_trunc_v_bf16m4_bf16m2(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_v_bf16m4_bf16m2(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m8_bf16mf4(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_v_bf16m8_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m8_bf16mf2(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_v_bf16m8_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_trunc_v_bf16m8_bf16m1(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_v_bf16m8_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_trunc_v_bf16m8_bf16m2(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_v_bf16m8_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_trunc_v_bf16m8_bf16m4(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_v_bf16m8_bf16m4(value);
+}
diff --git a/auto-generated/bfloat16/api-testing/vloxei16.c b/auto-generated/bfloat16/api-testing/vloxei16.c
new file mode 100644
index 000000000..86b076156
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vloxei16.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4(const __bf16 *rs1, vuint16mf4_t rs2,
+                                       size_t vl) {
+  return __riscv_vloxei16_v_bf16mf4(rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2(const __bf16 *rs1, vuint16mf2_t rs2,
+                                       size_t vl) {
+  return __riscv_vloxei16_v_bf16mf2(rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16_v_bf16m1(rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16_v_bf16m2(rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4(const __bf16 *rs1, vuint16m4_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16_v_bf16m4(rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8(const __bf16 *rs1, vuint16m8_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16_v_bf16m8(rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16mf4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16mf2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                       vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m1_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                       vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                       vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                       vuint16m8_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m8_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vloxseg2ei16.c b/auto-generated/bfloat16/api-testing/vloxseg2ei16.c
new file mode 100644
index 000000000..1ee5de8c9
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vloxseg2ei16.c
@@ -0,0 +1,54 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf4x2(rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf2x2(rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m1x2(rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m2x2(rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2(const __bf16 *rs1, vuint16m4_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m4x2(rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf4x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf2x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m1x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m2x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                               vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m4x2_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vloxseg3ei16.c b/auto-generated/bfloat16/api-testing/vloxseg3ei16.c
new file mode 100644
index 000000000..0f8f21676
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vloxseg3ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf4x3(rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf2x3(rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m1x3(rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m2x3(rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf4x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf2x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m1x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m2x3_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vloxseg4ei16.c b/auto-generated/bfloat16/api-testing/vloxseg4ei16.c
new file mode 100644
index 000000000..535f74024
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vloxseg4ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf4x4(rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf2x4(rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m1x4(rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m2x4(rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf4x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf2x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m1x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m2x4_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vloxseg5ei16.c b/auto-generated/bfloat16/api-testing/vloxseg5ei16.c
new file mode 100644
index 000000000..294b40dee
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vloxseg5ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf4x5(rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf2x5(rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16m1x5(rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf4x5_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf2x5_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16m1x5_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vloxseg6ei16.c b/auto-generated/bfloat16/api-testing/vloxseg6ei16.c
new file mode 100644
index 000000000..17c579abf
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vloxseg6ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf4x6(rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf2x6(rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16m1x6(rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf4x6_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf2x6_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16m1x6_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vloxseg7ei16.c b/auto-generated/bfloat16/api-testing/vloxseg7ei16.c
new file mode 100644
index 000000000..f0e04f0f8
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vloxseg7ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf4x7(rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf2x7(rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16m1x7(rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf4x7_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf2x7_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16m1x7_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vloxseg8ei16.c b/auto-generated/bfloat16/api-testing/vloxseg8ei16.c
new file mode 100644
index 000000000..19a53eadd
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vloxseg8ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf4x8(rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf2x8(rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16m1x8(rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf4x8_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf2x8_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16m1x8_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlse16.c b/auto-generated/bfloat16/api-testing/vlse16.c
new file mode 100644
index 000000000..6ec4a53a8
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlse16.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4(const __bf16 *rs1, ptrdiff_t rs2,
+                                     size_t vl) {
+  return __riscv_vlse16_v_bf16mf4(rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2(const __bf16 *rs1, ptrdiff_t rs2,
+                                     size_t vl) {
+  return __riscv_vlse16_v_bf16mf2(rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1(const __bf16 *rs1, ptrdiff_t rs2,
+                                   size_t vl) {
+  return __riscv_vlse16_v_bf16m1(rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2(const __bf16 *rs1, ptrdiff_t rs2,
+                                   size_t vl) {
+  return __riscv_vlse16_v_bf16m2(rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4(const __bf16 *rs1, ptrdiff_t rs2,
+                                   size_t vl) {
+  return __riscv_vlse16_v_bf16m4(rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8(const __bf16 *rs1, ptrdiff_t rs2,
+                                   size_t vl) {
+  return __riscv_vlse16_v_bf16m8(rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16mf4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16mf2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m1_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m8_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlseg2e16.c b/auto-generated/bfloat16/api-testing/vlseg2e16.c
new file mode 100644
index 000000000..1db59e83f
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlseg2e16.c
@@ -0,0 +1,47 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf4x2(rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf2x2(rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m1x2(rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m2x2(rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m4x2(rs1, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf4x2_m(vm, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf2x2_m(vm, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m1x2_m(vm, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m2x2_m(vm, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m4x2_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlseg2e16ff.c b/auto-generated/bfloat16/api-testing/vlseg2e16ff.c
new file mode 100644
index 000000000..cd0e7e381
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlseg2e16ff.c
@@ -0,0 +1,52 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf4x2(rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf2x2(rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m1x2(rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m2x2(rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m4x2(rs1, new_vl, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf4x2_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf2x2_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m1x2_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m2x2_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m4x2_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlseg3e16.c b/auto-generated/bfloat16/api-testing/vlseg3e16.c
new file mode 100644
index 000000000..52e98dcc0
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlseg3e16.c
@@ -0,0 +1,38 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf4x3(rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf2x3(rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m1x3(rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m2x3(rs1, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf4x3_m(vm, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf2x3_m(vm, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m1x3_m(vm, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m2x3_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlseg3e16ff.c b/auto-generated/bfloat16/api-testing/vlseg3e16ff.c
new file mode 100644
index 000000000..623bb8f18
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlseg3e16ff.c
@@ -0,0 +1,42 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf4x3(rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf2x3(rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m1x3(rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m2x3(rs1, new_vl, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf4x3_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf2x3_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m1x3_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m2x3_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlseg4e16.c b/auto-generated/bfloat16/api-testing/vlseg4e16.c
new file mode 100644
index 000000000..b0d4a9411
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlseg4e16.c
@@ -0,0 +1,38 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf4x4(rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf2x4(rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m1x4(rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m2x4(rs1, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf4x4_m(vm, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf2x4_m(vm, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m1x4_m(vm, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m2x4_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlseg4e16ff.c b/auto-generated/bfloat16/api-testing/vlseg4e16ff.c
new file mode 100644
index 000000000..7e76bc96a
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlseg4e16ff.c
@@ -0,0 +1,42 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf4x4(rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf2x4(rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m1x4(rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m2x4(rs1, new_vl, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf4x4_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf2x4_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m1x4_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m2x4_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlseg5e16.c b/auto-generated/bfloat16/api-testing/vlseg5e16.c
new file mode 100644
index 000000000..a36ca8401
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlseg5e16.c
@@ -0,0 +1,29 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf4x5(rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf2x5(rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16m1x5(rs1, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf4x5_m(vm, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf2x5_m(vm, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg5e16_v_bf16m1x5_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlseg5e16ff.c b/auto-generated/bfloat16/api-testing/vlseg5e16ff.c
new file mode 100644
index 000000000..ae2f49900
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlseg5e16ff.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf4x5(rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf2x5(rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16m1x5(rs1, new_vl, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf4x5_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf2x5_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16m1x5_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlseg6e16.c b/auto-generated/bfloat16/api-testing/vlseg6e16.c
new file mode 100644
index 000000000..fc96aabaf
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlseg6e16.c
@@ -0,0 +1,29 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf4x6(rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf2x6(rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16m1x6(rs1, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf4x6_m(vm, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf2x6_m(vm, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg6e16_v_bf16m1x6_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlseg6e16ff.c b/auto-generated/bfloat16/api-testing/vlseg6e16ff.c
new file mode 100644
index 000000000..600f39ed0
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlseg6e16ff.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf4x6(rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf2x6(rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16m1x6(rs1, new_vl, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf4x6_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf2x6_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16m1x6_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlseg7e16.c b/auto-generated/bfloat16/api-testing/vlseg7e16.c
new file mode 100644
index 000000000..530d67b29
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlseg7e16.c
@@ -0,0 +1,29 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf4x7(rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf2x7(rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16m1x7(rs1, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf4x7_m(vm, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf2x7_m(vm, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg7e16_v_bf16m1x7_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlseg7e16ff.c b/auto-generated/bfloat16/api-testing/vlseg7e16ff.c
new file mode 100644
index 000000000..918c59ae5
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlseg7e16ff.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf4x7(rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf2x7(rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16m1x7(rs1, new_vl, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf4x7_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf2x7_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16m1x7_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlseg8e16.c b/auto-generated/bfloat16/api-testing/vlseg8e16.c
new file mode 100644
index 000000000..4a3576db4
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlseg8e16.c
@@ -0,0 +1,29 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf4x8(rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf2x8(rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16m1x8(rs1, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf4x8_m(vm, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf2x8_m(vm, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg8e16_v_bf16m1x8_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlseg8e16ff.c b/auto-generated/bfloat16/api-testing/vlseg8e16ff.c
new file mode 100644
index 000000000..16d539e22
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlseg8e16ff.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf4x8(rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf2x8(rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16m1x8(rs1, new_vl, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf4x8_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf2x8_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16m1x8_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlsseg2e16.c b/auto-generated/bfloat16/api-testing/vlsseg2e16.c
new file mode 100644
index 000000000..444299755
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlsseg2e16.c
@@ -0,0 +1,52 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf4x2(rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf2x2(rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m1x2(rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m2x2(rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m4x2(rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf4x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf2x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m1x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m2x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m4x2_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlsseg3e16.c b/auto-generated/bfloat16/api-testing/vlsseg3e16.c
new file mode 100644
index 000000000..02b38c6ea
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlsseg3e16.c
@@ -0,0 +1,42 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf4x3(rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf2x3(rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m1x3(rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m2x3(rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf4x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf2x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m1x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m2x3_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlsseg4e16.c b/auto-generated/bfloat16/api-testing/vlsseg4e16.c
new file mode 100644
index 000000000..629326dc1
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlsseg4e16.c
@@ -0,0 +1,42 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf4x4(rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf2x4(rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m1x4(rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m2x4(rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf4x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf2x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m1x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m2x4_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlsseg5e16.c b/auto-generated/bfloat16/api-testing/vlsseg5e16.c
new file mode 100644
index 000000000..82f62d786
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlsseg5e16.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf4x5(rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf2x5(rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16m1x5(rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf4x5_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf2x5_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16m1x5_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlsseg6e16.c b/auto-generated/bfloat16/api-testing/vlsseg6e16.c
new file mode 100644
index 000000000..aa9e7083a
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlsseg6e16.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf4x6(rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf2x6(rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16m1x6(rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf4x6_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf2x6_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16m1x6_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlsseg7e16.c b/auto-generated/bfloat16/api-testing/vlsseg7e16.c
new file mode 100644
index 000000000..01b6fd2d8
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlsseg7e16.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf4x7(rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf2x7(rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16m1x7(rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf4x7_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf2x7_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16m1x7_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vlsseg8e16.c b/auto-generated/bfloat16/api-testing/vlsseg8e16.c
new file mode 100644
index 000000000..65b6e157e
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vlsseg8e16.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf4x8(rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf2x8(rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16m1x8(rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf4x8_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf2x8_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16m1x8_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vluxei16.c b/auto-generated/bfloat16/api-testing/vluxei16.c
new file mode 100644
index 000000000..47f978c37
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vluxei16.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4(const __bf16 *rs1, vuint16mf4_t rs2,
+                                       size_t vl) {
+  return __riscv_vluxei16_v_bf16mf4(rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2(const __bf16 *rs1, vuint16mf2_t rs2,
+                                       size_t vl) {
+  return __riscv_vluxei16_v_bf16mf2(rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16_v_bf16m1(rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16_v_bf16m2(rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4(const __bf16 *rs1, vuint16m4_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16_v_bf16m4(rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8(const __bf16 *rs1, vuint16m8_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16_v_bf16m8(rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16mf4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16mf2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                       vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m1_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                       vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                       vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                       vuint16m8_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m8_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vluxseg2ei16.c b/auto-generated/bfloat16/api-testing/vluxseg2ei16.c
new file mode 100644
index 000000000..67ab0184d
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vluxseg2ei16.c
@@ -0,0 +1,54 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf4x2(rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf2x2(rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m1x2(rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m2x2(rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2(const __bf16 *rs1, vuint16m4_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m4x2(rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf4x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf2x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m1x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m2x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                               vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m4x2_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vluxseg3ei16.c b/auto-generated/bfloat16/api-testing/vluxseg3ei16.c
new file mode 100644
index 000000000..3f43a614d
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vluxseg3ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf4x3(rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf2x3(rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m1x3(rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m2x3(rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf4x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf2x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m1x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m2x3_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vluxseg4ei16.c b/auto-generated/bfloat16/api-testing/vluxseg4ei16.c
new file mode 100644
index 000000000..942ccef90
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vluxseg4ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf4x4(rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf2x4(rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m1x4(rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m2x4(rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf4x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf2x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m1x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m2x4_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vluxseg5ei16.c b/auto-generated/bfloat16/api-testing/vluxseg5ei16.c
new file mode 100644
index 000000000..81f396ba6
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vluxseg5ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf4x5(rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf2x5(rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16m1x5(rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf4x5_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf2x5_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16m1x5_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vluxseg6ei16.c b/auto-generated/bfloat16/api-testing/vluxseg6ei16.c
new file mode 100644
index 000000000..6f0aaa56b
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vluxseg6ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf4x6(rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf2x6(rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16m1x6(rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf4x6_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf2x6_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16m1x6_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vluxseg7ei16.c b/auto-generated/bfloat16/api-testing/vluxseg7ei16.c
new file mode 100644
index 000000000..dd1c46108
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vluxseg7ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf4x7(rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf2x7(rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16m1x7(rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf4x7_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf2x7_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16m1x7_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vluxseg8ei16.c b/auto-generated/bfloat16/api-testing/vluxseg8ei16.c
new file mode 100644
index 000000000..ea3d2be1e
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vluxseg8ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf4x8(rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf2x8(rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16m1x8(rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf4x8_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf2x8_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16m1x8_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vreinterpret.c b/auto-generated/bfloat16/api-testing/vreinterpret.c
new file mode 100644
index 000000000..44975a392
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vreinterpret.c
@@ -0,0 +1,98 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vreinterpret_v_i16mf4_bf16mf4(vint16mf4_t src) {
+  return __riscv_vreinterpret_v_i16mf4_bf16mf4(src);
+}
+
+vbfloat16mf2_t test_vreinterpret_v_i16mf2_bf16mf2(vint16mf2_t src) {
+  return __riscv_vreinterpret_v_i16mf2_bf16mf2(src);
+}
+
+vbfloat16m1_t test_vreinterpret_v_i16m1_bf16m1(vint16m1_t src) {
+  return __riscv_vreinterpret_v_i16m1_bf16m1(src);
+}
+
+vbfloat16m2_t test_vreinterpret_v_i16m2_bf16m2(vint16m2_t src) {
+  return __riscv_vreinterpret_v_i16m2_bf16m2(src);
+}
+
+vbfloat16m4_t test_vreinterpret_v_i16m4_bf16m4(vint16m4_t src) {
+  return __riscv_vreinterpret_v_i16m4_bf16m4(src);
+}
+
+vbfloat16m8_t test_vreinterpret_v_i16m8_bf16m8(vint16m8_t src) {
+  return __riscv_vreinterpret_v_i16m8_bf16m8(src);
+}
+
+vbfloat16mf4_t test_vreinterpret_v_u16mf4_bf16mf4(vuint16mf4_t src) {
+  return __riscv_vreinterpret_v_u16mf4_bf16mf4(src);
+}
+
+vbfloat16mf2_t test_vreinterpret_v_u16mf2_bf16mf2(vuint16mf2_t src) {
+  return __riscv_vreinterpret_v_u16mf2_bf16mf2(src);
+}
+
+vbfloat16m1_t test_vreinterpret_v_u16m1_bf16m1(vuint16m1_t src) {
+  return __riscv_vreinterpret_v_u16m1_bf16m1(src);
+}
+
+vbfloat16m2_t test_vreinterpret_v_u16m2_bf16m2(vuint16m2_t src) {
+  return __riscv_vreinterpret_v_u16m2_bf16m2(src);
+}
+
+vbfloat16m4_t test_vreinterpret_v_u16m4_bf16m4(vuint16m4_t src) {
+  return __riscv_vreinterpret_v_u16m4_bf16m4(src);
+}
+
+vbfloat16m8_t test_vreinterpret_v_u16m8_bf16m8(vuint16m8_t src) {
+  return __riscv_vreinterpret_v_u16m8_bf16m8(src);
+}
+
+vint16mf4_t test_vreinterpret_v_bf16mf4_i16mf4(vbfloat16mf4_t src) {
+  return __riscv_vreinterpret_v_bf16mf4_i16mf4(src);
+}
+
+vint16mf2_t test_vreinterpret_v_bf16mf2_i16mf2(vbfloat16mf2_t src) {
+  return __riscv_vreinterpret_v_bf16mf2_i16mf2(src);
+}
+
+vint16m1_t test_vreinterpret_v_bf16m1_i16m1(vbfloat16m1_t src) {
+  return __riscv_vreinterpret_v_bf16m1_i16m1(src);
+}
+
+vint16m2_t test_vreinterpret_v_bf16m2_i16m2(vbfloat16m2_t src) {
+  return __riscv_vreinterpret_v_bf16m2_i16m2(src);
+}
+
+vint16m4_t test_vreinterpret_v_bf16m4_i16m4(vbfloat16m4_t src) {
+  return __riscv_vreinterpret_v_bf16m4_i16m4(src);
+}
+
+vint16m8_t test_vreinterpret_v_bf16m8_i16m8(vbfloat16m8_t src) {
+  return __riscv_vreinterpret_v_bf16m8_i16m8(src);
+}
+
+vuint16mf4_t test_vreinterpret_v_bf16mf4_u16mf4(vbfloat16mf4_t src) {
+  return __riscv_vreinterpret_v_bf16mf4_u16mf4(src);
+}
+
+vuint16mf2_t test_vreinterpret_v_bf16mf2_u16mf2(vbfloat16mf2_t src) {
+  return __riscv_vreinterpret_v_bf16mf2_u16mf2(src);
+}
+
+vuint16m1_t test_vreinterpret_v_bf16m1_u16m1(vbfloat16m1_t src) {
+  return __riscv_vreinterpret_v_bf16m1_u16m1(src);
+}
+
+vuint16m2_t test_vreinterpret_v_bf16m2_u16m2(vbfloat16m2_t src) {
+  return __riscv_vreinterpret_v_bf16m2_u16m2(src);
+}
+
+vuint16m4_t test_vreinterpret_v_bf16m4_u16m4(vbfloat16m4_t src) {
+  return __riscv_vreinterpret_v_bf16m4_u16m4(src);
+}
+
+vuint16m8_t test_vreinterpret_v_bf16m8_u16m8(vbfloat16m8_t src) {
+  return __riscv_vreinterpret_v_bf16m8_u16m8(src);
+}
diff --git a/auto-generated/bfloat16/api-testing/vse16.c b/auto-generated/bfloat16/api-testing/vse16.c
new file mode 100644
index 000000000..fa8c4d20f
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vse16.c
@@ -0,0 +1,56 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vse16_v_bf16mf4(__bf16 *rs1, vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vse16_v_bf16mf4(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16mf2(__bf16 *rs1, vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vse16_v_bf16mf2(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m1(__bf16 *rs1, vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vse16_v_bf16m1(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m2(__bf16 *rs1, vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vse16_v_bf16m2(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m4(__bf16 *rs1, vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vse16_v_bf16m4(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m8(__bf16 *rs1, vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vse16_v_bf16m8(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vbfloat16mf4_t vs3,
+                            size_t vl) {
+  return __riscv_vse16_v_bf16mf4_m(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vbfloat16mf2_t vs3,
+                            size_t vl) {
+  return __riscv_vse16_v_bf16mf2_m(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1_t vs3,
+                           size_t vl) {
+  return __riscv_vse16_v_bf16m1_m(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2_t vs3,
+                           size_t vl) {
+  return __riscv_vse16_v_bf16m2_m(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vbfloat16m4_t vs3,
+                           size_t vl) {
+  return __riscv_vse16_v_bf16m4_m(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vbfloat16m8_t vs3,
+                           size_t vl) {
+  return __riscv_vse16_v_bf16m8_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vset.c b/auto-generated/bfloat16/api-testing/vset.c
new file mode 100644
index 000000000..df82323a4
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vset.c
@@ -0,0 +1,171 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16m2_t test_vset_v_bf16m1_bf16m2(vbfloat16m2_t dest, size_t index,
+                                        vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m2(dest, 0, value);
+}
+
+vbfloat16m4_t test_vset_v_bf16m1_bf16m4(vbfloat16m4_t dest, size_t index,
+                                        vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m4(dest, 0, value);
+}
+
+vbfloat16m4_t test_vset_v_bf16m2_bf16m4(vbfloat16m4_t dest, size_t index,
+                                        vbfloat16m2_t value) {
+  return __riscv_vset_v_bf16m2_bf16m4(dest, 0, value);
+}
+
+vbfloat16m8_t test_vset_v_bf16m1_bf16m8(vbfloat16m8_t dest, size_t index,
+                                        vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m8(dest, 0, value);
+}
+
+vbfloat16m8_t test_vset_v_bf16m2_bf16m8(vbfloat16m8_t dest, size_t index,
+                                        vbfloat16m2_t value) {
+  return __riscv_vset_v_bf16m2_bf16m8(dest, 0, value);
+}
+
+vbfloat16m8_t test_vset_v_bf16m4_bf16m8(vbfloat16m8_t dest, size_t index,
+                                        vbfloat16m4_t value) {
+  return __riscv_vset_v_bf16m4_bf16m8(dest, 0, value);
+}
+
+vbfloat16mf4x2_t test_vset_v_bf16mf4_bf16mf4x2(vbfloat16mf4x2_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset_v_bf16mf4_bf16mf4x2(dest, 0, value);
+}
+
+vbfloat16mf4x3_t test_vset_v_bf16mf4_bf16mf4x3(vbfloat16mf4x3_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset_v_bf16mf4_bf16mf4x3(dest, 0, value);
+}
+
+vbfloat16mf4x4_t test_vset_v_bf16mf4_bf16mf4x4(vbfloat16mf4x4_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset_v_bf16mf4_bf16mf4x4(dest, 0, value);
+}
+
+vbfloat16mf4x5_t test_vset_v_bf16mf4_bf16mf4x5(vbfloat16mf4x5_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset_v_bf16mf4_bf16mf4x5(dest, 0, value);
+}
+
+vbfloat16mf4x6_t test_vset_v_bf16mf4_bf16mf4x6(vbfloat16mf4x6_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset_v_bf16mf4_bf16mf4x6(dest, 0, value);
+}
+
+vbfloat16mf4x7_t test_vset_v_bf16mf4_bf16mf4x7(vbfloat16mf4x7_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset_v_bf16mf4_bf16mf4x7(dest, 0, value);
+}
+
+vbfloat16mf4x8_t test_vset_v_bf16mf4_bf16mf4x8(vbfloat16mf4x8_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset_v_bf16mf4_bf16mf4x8(dest, 0, value);
+}
+
+vbfloat16mf2x2_t test_vset_v_bf16mf2_bf16mf2x2(vbfloat16mf2x2_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset_v_bf16mf2_bf16mf2x2(dest, 0, value);
+}
+
+vbfloat16mf2x3_t test_vset_v_bf16mf2_bf16mf2x3(vbfloat16mf2x3_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset_v_bf16mf2_bf16mf2x3(dest, 0, value);
+}
+
+vbfloat16mf2x4_t test_vset_v_bf16mf2_bf16mf2x4(vbfloat16mf2x4_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset_v_bf16mf2_bf16mf2x4(dest, 0, value);
+}
+
+vbfloat16mf2x5_t test_vset_v_bf16mf2_bf16mf2x5(vbfloat16mf2x5_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset_v_bf16mf2_bf16mf2x5(dest, 0, value);
+}
+
+vbfloat16mf2x6_t test_vset_v_bf16mf2_bf16mf2x6(vbfloat16mf2x6_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset_v_bf16mf2_bf16mf2x6(dest, 0, value);
+}
+
+vbfloat16mf2x7_t test_vset_v_bf16mf2_bf16mf2x7(vbfloat16mf2x7_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset_v_bf16mf2_bf16mf2x7(dest, 0, value);
+}
+
+vbfloat16mf2x8_t test_vset_v_bf16mf2_bf16mf2x8(vbfloat16mf2x8_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset_v_bf16mf2_bf16mf2x8(dest, 0, value);
+}
+
+vbfloat16m1x2_t test_vset_v_bf16m1_bf16m1x2(vbfloat16m1x2_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m1x2(dest, 0, value);
+}
+
+vbfloat16m1x3_t test_vset_v_bf16m1_bf16m1x3(vbfloat16m1x3_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m1x3(dest, 0, value);
+}
+
+vbfloat16m1x4_t test_vset_v_bf16m1_bf16m1x4(vbfloat16m1x4_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m1x4(dest, 0, value);
+}
+
+vbfloat16m1x5_t test_vset_v_bf16m1_bf16m1x5(vbfloat16m1x5_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m1x5(dest, 0, value);
+}
+
+vbfloat16m1x6_t test_vset_v_bf16m1_bf16m1x6(vbfloat16m1x6_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m1x6(dest, 0, value);
+}
+
+vbfloat16m1x7_t test_vset_v_bf16m1_bf16m1x7(vbfloat16m1x7_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m1x7(dest, 0, value);
+}
+
+vbfloat16m1x8_t test_vset_v_bf16m1_bf16m1x8(vbfloat16m1x8_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m1x8(dest, 0, value);
+}
+
+vbfloat16m2x2_t test_vset_v_bf16m2_bf16m2x2(vbfloat16m2x2_t dest, size_t index,
+                                            vbfloat16m2_t value) {
+  return __riscv_vset_v_bf16m2_bf16m2x2(dest, 0, value);
+}
+
+vbfloat16m2x3_t test_vset_v_bf16m2_bf16m2x3(vbfloat16m2x3_t dest, size_t index,
+                                            vbfloat16m2_t value) {
+  return __riscv_vset_v_bf16m2_bf16m2x3(dest, 0, value);
+}
+
+vbfloat16m2x4_t test_vset_v_bf16m2_bf16m2x4(vbfloat16m2x4_t dest, size_t index,
+                                            vbfloat16m2_t value) {
+  return __riscv_vset_v_bf16m2_bf16m2x4(dest, 0, value);
+}
+
+vbfloat16m4x2_t test_vset_v_bf16m4_bf16m4x2(vbfloat16m4x2_t dest, size_t index,
+                                            vbfloat16m4_t value) {
+  return __riscv_vset_v_bf16m4_bf16m4x2(dest, 0, value);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsoxei16.c b/auto-generated/bfloat16/api-testing/vsoxei16.c
new file mode 100644
index 000000000..730d0d479
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsoxei16.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxei16_v_bf16mf4(__bf16 *rs1, vuint16mf4_t rs2, vbfloat16mf4_t vs3,
+                             size_t vl) {
+  return __riscv_vsoxei16_v_bf16mf4(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16mf2(__bf16 *rs1, vuint16mf2_t rs2, vbfloat16mf2_t vs3,
+                             size_t vl) {
+  return __riscv_vsoxei16_v_bf16mf2(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m1(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16_v_bf16m1(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m2(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16_v_bf16m2(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m4(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16_v_bf16m4(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m8(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16_v_bf16m8(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                               vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vsoxei16_v_bf16mf4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                               vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vsoxei16_v_bf16mf2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                              vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vsoxei16_v_bf16m1_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                              vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vsoxei16_v_bf16m2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                              vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vsoxei16_v_bf16m4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                              vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vsoxei16_v_bf16m8_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsoxseg2ei16.c b/auto-generated/bfloat16/api-testing/vsoxseg2ei16.c
new file mode 100644
index 000000000..4a8bf8606
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsoxseg2ei16.c
@@ -0,0 +1,54 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxseg2ei16_v_bf16mf4x2(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16mf4x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16mf2x2(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16mf2x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m1x2(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16m1x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m2x2(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16m2x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m4x2(__bf16 *rs1, vuint16m4_t vs2,
+                                  vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16m4x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16mf4x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16mf2x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16m1x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16m2x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t vs2,
+                                    vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16m4x2_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsoxseg3ei16.c b/auto-generated/bfloat16/api-testing/vsoxseg3ei16.c
new file mode 100644
index 000000000..8eef4e9a2
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsoxseg3ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxseg3ei16_v_bf16mf4x3(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16mf4x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16mf2x3(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16mf2x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m1x3(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16m1x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m2x3(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16m2x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16mf4x3_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16mf2x3_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16m1x3_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16m2x3_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsoxseg4ei16.c b/auto-generated/bfloat16/api-testing/vsoxseg4ei16.c
new file mode 100644
index 000000000..f06ecf271
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsoxseg4ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxseg4ei16_v_bf16mf4x4(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16mf4x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16mf2x4(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16mf2x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m1x4(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16m1x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m2x4(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16m2x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16mf4x4_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16mf2x4_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16m1x4_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16m2x4_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsoxseg5ei16.c b/auto-generated/bfloat16/api-testing/vsoxseg5ei16.c
new file mode 100644
index 000000000..6f1d6f6ec
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsoxseg5ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxseg5ei16_v_bf16mf4x5(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16_v_bf16mf4x5(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16mf2x5(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16_v_bf16mf2x5(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16m1x5(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16_v_bf16m1x5(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg5ei16_v_bf16mf4x5_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg5ei16_v_bf16mf2x5_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16_v_bf16m1x5_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsoxseg6ei16.c b/auto-generated/bfloat16/api-testing/vsoxseg6ei16.c
new file mode 100644
index 000000000..50fca1660
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsoxseg6ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxseg6ei16_v_bf16mf4x6(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16_v_bf16mf4x6(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16mf2x6(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16_v_bf16mf2x6(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16m1x6(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16_v_bf16m1x6(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg6ei16_v_bf16mf4x6_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg6ei16_v_bf16mf2x6_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16_v_bf16m1x6_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsoxseg7ei16.c b/auto-generated/bfloat16/api-testing/vsoxseg7ei16.c
new file mode 100644
index 000000000..cff1eb034
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsoxseg7ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxseg7ei16_v_bf16mf4x7(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16_v_bf16mf4x7(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16mf2x7(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16_v_bf16mf2x7(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16m1x7(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16_v_bf16m1x7(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg7ei16_v_bf16mf4x7_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg7ei16_v_bf16mf2x7_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16_v_bf16m1x7_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsoxseg8ei16.c b/auto-generated/bfloat16/api-testing/vsoxseg8ei16.c
new file mode 100644
index 000000000..3dd02854a
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsoxseg8ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxseg8ei16_v_bf16mf4x8(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16_v_bf16mf4x8(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16mf2x8(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16_v_bf16mf2x8(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16m1x8(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16_v_bf16m1x8(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg8ei16_v_bf16mf4x8_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg8ei16_v_bf16mf2x8_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16_v_bf16m1x8_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsse16.c b/auto-generated/bfloat16/api-testing/vsse16.c
new file mode 100644
index 000000000..0ad6a14bf
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsse16.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsse16_v_bf16mf4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4_t vs3,
+                           size_t vl) {
+  return __riscv_vsse16_v_bf16mf4(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16mf2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2_t vs3,
+                           size_t vl) {
+  return __riscv_vsse16_v_bf16mf2(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m1(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16_v_bf16m1(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16_v_bf16m2(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m4_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16_v_bf16m4(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m8(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m8_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16_v_bf16m8(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                             vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vsse16_v_bf16mf4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                             vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vsse16_v_bf16mf2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vsse16_v_bf16m1_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vsse16_v_bf16m2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vsse16_v_bf16m4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vsse16_v_bf16m8_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsseg2e16.c b/auto-generated/bfloat16/api-testing/vsseg2e16.c
new file mode 100644
index 000000000..868ca48a1
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsseg2e16.c
@@ -0,0 +1,47 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsseg2e16_v_bf16mf4x2(__bf16 *rs1, vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16_v_bf16mf4x2(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16mf2x2(__bf16 *rs1, vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16_v_bf16mf2x2(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m1x2(__bf16 *rs1, vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16_v_bf16m1x2(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m2x2(__bf16 *rs1, vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16_v_bf16m2x2(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m4x2(__bf16 *rs1, vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16_v_bf16m4x2(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16_v_bf16mf4x2_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16_v_bf16mf2x2_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x2_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg2e16_v_bf16m1x2_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2x2_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg2e16_v_bf16m2x2_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vbfloat16m4x2_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg2e16_v_bf16m4x2_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsseg3e16.c b/auto-generated/bfloat16/api-testing/vsseg3e16.c
new file mode 100644
index 000000000..2859813e2
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsseg3e16.c
@@ -0,0 +1,38 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsseg3e16_v_bf16mf4x3(__bf16 *rs1, vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16_v_bf16mf4x3(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16mf2x3(__bf16 *rs1, vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16_v_bf16mf2x3(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m1x3(__bf16 *rs1, vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16_v_bf16m1x3(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m2x3(__bf16 *rs1, vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16_v_bf16m2x3(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16_v_bf16mf4x3_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16_v_bf16mf2x3_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x3_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg3e16_v_bf16m1x3_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2x3_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg3e16_v_bf16m2x3_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsseg4e16.c b/auto-generated/bfloat16/api-testing/vsseg4e16.c
new file mode 100644
index 000000000..41132b932
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsseg4e16.c
@@ -0,0 +1,38 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsseg4e16_v_bf16mf4x4(__bf16 *rs1, vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16_v_bf16mf4x4(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16mf2x4(__bf16 *rs1, vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16_v_bf16mf2x4(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m1x4(__bf16 *rs1, vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16_v_bf16m1x4(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m2x4(__bf16 *rs1, vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16_v_bf16m2x4(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16_v_bf16mf4x4_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16_v_bf16mf2x4_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x4_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg4e16_v_bf16m1x4_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2x4_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg4e16_v_bf16m2x4_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsseg5e16.c b/auto-generated/bfloat16/api-testing/vsseg5e16.c
new file mode 100644
index 000000000..e09575ab0
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsseg5e16.c
@@ -0,0 +1,29 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsseg5e16_v_bf16mf4x5(__bf16 *rs1, vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16_v_bf16mf4x5(rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16mf2x5(__bf16 *rs1, vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16_v_bf16mf2x5(rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16m1x5(__bf16 *rs1, vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16_v_bf16m1x5(rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16_v_bf16mf4x5_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16_v_bf16mf2x5_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x5_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg5e16_v_bf16m1x5_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsseg6e16.c b/auto-generated/bfloat16/api-testing/vsseg6e16.c
new file mode 100644
index 000000000..5da413ae0
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsseg6e16.c
@@ -0,0 +1,29 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsseg6e16_v_bf16mf4x6(__bf16 *rs1, vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16_v_bf16mf4x6(rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16mf2x6(__bf16 *rs1, vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16_v_bf16mf2x6(rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16m1x6(__bf16 *rs1, vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16_v_bf16m1x6(rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16_v_bf16mf4x6_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16_v_bf16mf2x6_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x6_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg6e16_v_bf16m1x6_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsseg7e16.c b/auto-generated/bfloat16/api-testing/vsseg7e16.c
new file mode 100644
index 000000000..c0674806e
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsseg7e16.c
@@ -0,0 +1,29 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsseg7e16_v_bf16mf4x7(__bf16 *rs1, vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16_v_bf16mf4x7(rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16mf2x7(__bf16 *rs1, vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16_v_bf16mf2x7(rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16m1x7(__bf16 *rs1, vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16_v_bf16m1x7(rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16_v_bf16mf4x7_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16_v_bf16mf2x7_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x7_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg7e16_v_bf16m1x7_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsseg8e16.c b/auto-generated/bfloat16/api-testing/vsseg8e16.c
new file mode 100644
index 000000000..b508667c5
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsseg8e16.c
@@ -0,0 +1,29 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsseg8e16_v_bf16mf4x8(__bf16 *rs1, vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16_v_bf16mf4x8(rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16mf2x8(__bf16 *rs1, vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16_v_bf16mf2x8(rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16m1x8(__bf16 *rs1, vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16_v_bf16m1x8(rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16_v_bf16mf4x8_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16_v_bf16mf2x8_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x8_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg8e16_v_bf16m1x8_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vssseg2e16.c b/auto-generated/bfloat16/api-testing/vssseg2e16.c
new file mode 100644
index 000000000..4befac177
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vssseg2e16.c
@@ -0,0 +1,52 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vssseg2e16_v_bf16mf4x2(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16_v_bf16mf4x2(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16mf2x2(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16_v_bf16mf2x2(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m1x2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x2_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg2e16_v_bf16m1x2(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m2x2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x2_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg2e16_v_bf16m2x2(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m4x2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m4x2_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg2e16_v_bf16m4x2(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16_v_bf16mf4x2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16_v_bf16mf2x2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16_v_bf16m1x2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16_v_bf16m2x2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16_v_bf16m4x2_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vssseg3e16.c b/auto-generated/bfloat16/api-testing/vssseg3e16.c
new file mode 100644
index 000000000..329ef56ea
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vssseg3e16.c
@@ -0,0 +1,42 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vssseg3e16_v_bf16mf4x3(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16_v_bf16mf4x3(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16mf2x3(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16_v_bf16mf2x3(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m1x3(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x3_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg3e16_v_bf16m1x3(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m2x3(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x3_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg3e16_v_bf16m2x3(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16_v_bf16mf4x3_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16_v_bf16mf2x3_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16_v_bf16m1x3_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16_v_bf16m2x3_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vssseg4e16.c b/auto-generated/bfloat16/api-testing/vssseg4e16.c
new file mode 100644
index 000000000..91646e642
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vssseg4e16.c
@@ -0,0 +1,42 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vssseg4e16_v_bf16mf4x4(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16_v_bf16mf4x4(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16mf2x4(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16_v_bf16mf2x4(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m1x4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x4_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg4e16_v_bf16m1x4(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m2x4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x4_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg4e16_v_bf16m2x4(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16_v_bf16mf4x4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16_v_bf16mf2x4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16_v_bf16m1x4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16_v_bf16m2x4_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vssseg5e16.c b/auto-generated/bfloat16/api-testing/vssseg5e16.c
new file mode 100644
index 000000000..a1e4430d3
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vssseg5e16.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vssseg5e16_v_bf16mf4x5(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16_v_bf16mf4x5(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16mf2x5(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16_v_bf16mf2x5(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16m1x5(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x5_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg5e16_v_bf16m1x5(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16_v_bf16mf4x5_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16_v_bf16mf2x5_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16_v_bf16m1x5_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vssseg6e16.c b/auto-generated/bfloat16/api-testing/vssseg6e16.c
new file mode 100644
index 000000000..1f807f889
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vssseg6e16.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vssseg6e16_v_bf16mf4x6(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16_v_bf16mf4x6(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16mf2x6(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16_v_bf16mf2x6(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16m1x6(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x6_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg6e16_v_bf16m1x6(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16_v_bf16mf4x6_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16_v_bf16mf2x6_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16_v_bf16m1x6_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vssseg7e16.c b/auto-generated/bfloat16/api-testing/vssseg7e16.c
new file mode 100644
index 000000000..0ac2db471
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vssseg7e16.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vssseg7e16_v_bf16mf4x7(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16_v_bf16mf4x7(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16mf2x7(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16_v_bf16mf2x7(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16m1x7(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x7_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg7e16_v_bf16m1x7(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16_v_bf16mf4x7_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16_v_bf16mf2x7_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16_v_bf16m1x7_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vssseg8e16.c b/auto-generated/bfloat16/api-testing/vssseg8e16.c
new file mode 100644
index 000000000..864344540
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vssseg8e16.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vssseg8e16_v_bf16mf4x8(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16_v_bf16mf4x8(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16mf2x8(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16_v_bf16mf2x8(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16m1x8(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x8_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg8e16_v_bf16m1x8(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16_v_bf16mf4x8_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16_v_bf16mf2x8_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16_v_bf16m1x8_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsuxei16.c b/auto-generated/bfloat16/api-testing/vsuxei16.c
new file mode 100644
index 000000000..440ee93fe
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsuxei16.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxei16_v_bf16mf4(__bf16 *rs1, vuint16mf4_t rs2, vbfloat16mf4_t vs3,
+                             size_t vl) {
+  return __riscv_vsuxei16_v_bf16mf4(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16mf2(__bf16 *rs1, vuint16mf2_t rs2, vbfloat16mf2_t vs3,
+                             size_t vl) {
+  return __riscv_vsuxei16_v_bf16mf2(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m1(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16_v_bf16m1(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m2(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16_v_bf16m2(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m4(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16_v_bf16m4(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m8(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16_v_bf16m8(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                               vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vsuxei16_v_bf16mf4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                               vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vsuxei16_v_bf16mf2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                              vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vsuxei16_v_bf16m1_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                              vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vsuxei16_v_bf16m2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                              vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vsuxei16_v_bf16m4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                              vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vsuxei16_v_bf16m8_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsuxseg2ei16.c b/auto-generated/bfloat16/api-testing/vsuxseg2ei16.c
new file mode 100644
index 000000000..03827f92a
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsuxseg2ei16.c
@@ -0,0 +1,54 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxseg2ei16_v_bf16mf4x2(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16mf4x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16mf2x2(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16mf2x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m1x2(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16m1x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m2x2(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16m2x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m4x2(__bf16 *rs1, vuint16m4_t vs2,
+                                  vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16m4x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16mf4x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16mf2x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16m1x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16m2x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t vs2,
+                                    vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16m4x2_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsuxseg3ei16.c b/auto-generated/bfloat16/api-testing/vsuxseg3ei16.c
new file mode 100644
index 000000000..4e3698506
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsuxseg3ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxseg3ei16_v_bf16mf4x3(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16mf4x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16mf2x3(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16mf2x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m1x3(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16m1x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m2x3(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16m2x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16mf4x3_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16mf2x3_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16m1x3_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16m2x3_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsuxseg4ei16.c b/auto-generated/bfloat16/api-testing/vsuxseg4ei16.c
new file mode 100644
index 000000000..fda4e5e7e
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsuxseg4ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxseg4ei16_v_bf16mf4x4(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16mf4x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16mf2x4(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16mf2x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m1x4(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16m1x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m2x4(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16m2x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16mf4x4_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16mf2x4_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16m1x4_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16m2x4_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsuxseg5ei16.c b/auto-generated/bfloat16/api-testing/vsuxseg5ei16.c
new file mode 100644
index 000000000..07689a012
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsuxseg5ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxseg5ei16_v_bf16mf4x5(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16_v_bf16mf4x5(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16mf2x5(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16_v_bf16mf2x5(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16m1x5(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16_v_bf16m1x5(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg5ei16_v_bf16mf4x5_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg5ei16_v_bf16mf2x5_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16_v_bf16m1x5_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsuxseg6ei16.c b/auto-generated/bfloat16/api-testing/vsuxseg6ei16.c
new file mode 100644
index 000000000..8df400e67
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsuxseg6ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxseg6ei16_v_bf16mf4x6(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16_v_bf16mf4x6(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16mf2x6(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16_v_bf16mf2x6(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16m1x6(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16_v_bf16m1x6(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg6ei16_v_bf16mf4x6_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg6ei16_v_bf16mf2x6_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16_v_bf16m1x6_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsuxseg7ei16.c b/auto-generated/bfloat16/api-testing/vsuxseg7ei16.c
new file mode 100644
index 000000000..b2408d17e
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsuxseg7ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxseg7ei16_v_bf16mf4x7(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16_v_bf16mf4x7(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16mf2x7(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16_v_bf16mf2x7(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16m1x7(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16_v_bf16m1x7(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg7ei16_v_bf16mf4x7_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg7ei16_v_bf16mf2x7_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16_v_bf16m1x7_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vsuxseg8ei16.c b/auto-generated/bfloat16/api-testing/vsuxseg8ei16.c
new file mode 100644
index 000000000..195aa60b8
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vsuxseg8ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxseg8ei16_v_bf16mf4x8(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16_v_bf16mf4x8(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16mf2x8(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16_v_bf16mf2x8(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16m1x8(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16_v_bf16m1x8(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg8ei16_v_bf16mf4x8_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg8ei16_v_bf16mf2x8_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16_v_bf16m1x8_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/api-testing/vundefined.c b/auto-generated/bfloat16/api-testing/vundefined.c
new file mode 100644
index 000000000..13a91ae9d
--- /dev/null
+++ b/auto-generated/bfloat16/api-testing/vundefined.c
@@ -0,0 +1,118 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vundefined_bf16mf4() {
+  return __riscv_vundefined_bf16mf4();
+}
+
+vbfloat16mf2_t test_vundefined_bf16mf2() {
+  return __riscv_vundefined_bf16mf2();
+}
+
+vbfloat16m1_t test_vundefined_bf16m1() { return __riscv_vundefined_bf16m1(); }
+
+vbfloat16m2_t test_vundefined_bf16m2() { return __riscv_vundefined_bf16m2(); }
+
+vbfloat16m4_t test_vundefined_bf16m4() { return __riscv_vundefined_bf16m4(); }
+
+vbfloat16m8_t test_vundefined_bf16m8() { return __riscv_vundefined_bf16m8(); }
+
+vbfloat16mf4x2_t test_vundefined_bf16mf4x2() {
+  return __riscv_vundefined_bf16mf4x2();
+}
+
+vbfloat16mf4x3_t test_vundefined_bf16mf4x3() {
+  return __riscv_vundefined_bf16mf4x3();
+}
+
+vbfloat16mf4x4_t test_vundefined_bf16mf4x4() {
+  return __riscv_vundefined_bf16mf4x4();
+}
+
+vbfloat16mf4x5_t test_vundefined_bf16mf4x5() {
+  return __riscv_vundefined_bf16mf4x5();
+}
+
+vbfloat16mf4x6_t test_vundefined_bf16mf4x6() {
+  return __riscv_vundefined_bf16mf4x6();
+}
+
+vbfloat16mf4x7_t test_vundefined_bf16mf4x7() {
+  return __riscv_vundefined_bf16mf4x7();
+}
+
+vbfloat16mf4x8_t test_vundefined_bf16mf4x8() {
+  return __riscv_vundefined_bf16mf4x8();
+}
+
+vbfloat16mf2x2_t test_vundefined_bf16mf2x2() {
+  return __riscv_vundefined_bf16mf2x2();
+}
+
+vbfloat16mf2x3_t test_vundefined_bf16mf2x3() {
+  return __riscv_vundefined_bf16mf2x3();
+}
+
+vbfloat16mf2x4_t test_vundefined_bf16mf2x4() {
+  return __riscv_vundefined_bf16mf2x4();
+}
+
+vbfloat16mf2x5_t test_vundefined_bf16mf2x5() {
+  return __riscv_vundefined_bf16mf2x5();
+}
+
+vbfloat16mf2x6_t test_vundefined_bf16mf2x6() {
+  return __riscv_vundefined_bf16mf2x6();
+}
+
+vbfloat16mf2x7_t test_vundefined_bf16mf2x7() {
+  return __riscv_vundefined_bf16mf2x7();
+}
+
+vbfloat16mf2x8_t test_vundefined_bf16mf2x8() {
+  return __riscv_vundefined_bf16mf2x8();
+}
+
+vbfloat16m1x2_t test_vundefined_bf16m1x2() {
+  return __riscv_vundefined_bf16m1x2();
+}
+
+vbfloat16m1x3_t test_vundefined_bf16m1x3() {
+  return __riscv_vundefined_bf16m1x3();
+}
+
+vbfloat16m1x4_t test_vundefined_bf16m1x4() {
+  return __riscv_vundefined_bf16m1x4();
+}
+
+vbfloat16m1x5_t test_vundefined_bf16m1x5() {
+  return __riscv_vundefined_bf16m1x5();
+}
+
+vbfloat16m1x6_t test_vundefined_bf16m1x6() {
+  return __riscv_vundefined_bf16m1x6();
+}
+
+vbfloat16m1x7_t test_vundefined_bf16m1x7() {
+  return __riscv_vundefined_bf16m1x7();
+}
+
+vbfloat16m1x8_t test_vundefined_bf16m1x8() {
+  return __riscv_vundefined_bf16m1x8();
+}
+
+vbfloat16m2x2_t test_vundefined_bf16m2x2() {
+  return __riscv_vundefined_bf16m2x2();
+}
+
+vbfloat16m2x3_t test_vundefined_bf16m2x3() {
+  return __riscv_vundefined_bf16m2x3();
+}
+
+vbfloat16m2x4_t test_vundefined_bf16m2x4() {
+  return __riscv_vundefined_bf16m2x4();
+}
+
+vbfloat16m4x2_t test_vundefined_bf16m4x2() {
+  return __riscv_vundefined_bf16m4x2();
+}
diff --git a/auto-generated/bfloat16/intrinsic_funcs.adoc b/auto-generated/bfloat16/intrinsic_funcs.adoc
new file mode 100644
index 000000000..3bd1a4222
--- /dev/null
+++ b/auto-generated/bfloat16/intrinsic_funcs.adoc
@@ -0,0 +1,1903 @@
+
+=== BFloat16 Vector Loads and Stores Intrinsics
+
+[[bf16-vector-unit-stride-load]]
+==== Vector Unit-Stride Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vle16_v_bf16mf4(const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16_v_bf16mf2(const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16_v_bf16m1(const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_v_bf16m2(const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16_v_bf16m4(const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16_v_bf16m8(const __bf16 *rs1, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                         size_t vl);
+vbfloat16mf2_t __riscv_vle16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                         size_t vl);
+vbfloat16m1_t __riscv_vle16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                       size_t vl);
+vbfloat16m2_t __riscv_vle16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                       size_t vl);
+vbfloat16m4_t __riscv_vle16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                       size_t vl);
+vbfloat16m8_t __riscv_vle16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                       size_t vl);
+----
+
+[[bf16-vector-unit-stride-store]]
+==== Vector Unit-Stride Store Intrinsics
+
+[,c]
+----
+void __riscv_vse16_v_bf16mf4(__bf16 *rs1, vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vse16_v_bf16mf2(__bf16 *rs1, vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vse16_v_bf16m1(__bf16 *rs1, vbfloat16m1_t vs3, size_t vl);
+void __riscv_vse16_v_bf16m2(__bf16 *rs1, vbfloat16m2_t vs3, size_t vl);
+void __riscv_vse16_v_bf16m4(__bf16 *rs1, vbfloat16m4_t vs3, size_t vl);
+void __riscv_vse16_v_bf16m8(__bf16 *rs1, vbfloat16m8_t vs3, size_t vl);
+// masked functions
+void __riscv_vse16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vbfloat16mf4_t vs3,
+                               size_t vl);
+void __riscv_vse16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vbfloat16mf2_t vs3,
+                               size_t vl);
+void __riscv_vse16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1_t vs3,
+                              size_t vl);
+void __riscv_vse16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2_t vs3,
+                              size_t vl);
+void __riscv_vse16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vbfloat16m4_t vs3,
+                              size_t vl);
+void __riscv_vse16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vbfloat16m8_t vs3,
+                              size_t vl);
+----
+
+[[vector-strided-load]]
+==== Vector Strided Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vlse16_v_bf16mf4(const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2_t __riscv_vlse16_v_bf16mf2(const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1_t __riscv_vlse16_v_bf16m1(const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m2_t __riscv_vlse16_v_bf16m2(const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m4_t __riscv_vlse16_v_bf16m4(const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m8_t __riscv_vlse16_v_bf16m8(const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vlse16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                          ptrdiff_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vlse16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                          ptrdiff_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vlse16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                        ptrdiff_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vlse16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                        ptrdiff_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vlse16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                        ptrdiff_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vlse16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                        ptrdiff_t rs2, size_t vl);
+----
+
+[[vector-strided-store]]
+==== Vector Strided Store Intrinsics
+
+[,c]
+----
+void __riscv_vsse16_v_bf16mf4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4_t vs3,
+                              size_t vl);
+void __riscv_vsse16_v_bf16mf2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2_t vs3,
+                              size_t vl);
+void __riscv_vsse16_v_bf16m1(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1_t vs3,
+                             size_t vl);
+void __riscv_vsse16_v_bf16m2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2_t vs3,
+                             size_t vl);
+void __riscv_vsse16_v_bf16m4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m4_t vs3,
+                             size_t vl);
+void __riscv_vsse16_v_bf16m8(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m8_t vs3,
+                             size_t vl);
+// masked functions
+void __riscv_vsse16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsse16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsse16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                               vbfloat16m1_t vs3, size_t vl);
+void __riscv_vsse16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                               vbfloat16m2_t vs3, size_t vl);
+void __riscv_vsse16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                               vbfloat16m4_t vs3, size_t vl);
+void __riscv_vsse16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                               vbfloat16m8_t vs3, size_t vl);
+----
+
+[[vector-indexed-load]]
+==== Vector Indexed Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vloxei16_v_bf16mf4(const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_v_bf16mf2(const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16m1_t __riscv_vloxei16_v_bf16m1(const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m2_t __riscv_vloxei16_v_bf16m2(const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m4_t __riscv_vloxei16_v_bf16m4(const __bf16 *rs1, vuint16m4_t rs2,
+                                        size_t vl);
+vbfloat16m8_t __riscv_vloxei16_v_bf16m8(const __bf16 *rs1, vuint16m8_t rs2,
+                                        size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_v_bf16mf4(const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_v_bf16mf2(const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16m1_t __riscv_vluxei16_v_bf16m1(const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m2_t __riscv_vluxei16_v_bf16m2(const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m4_t __riscv_vluxei16_v_bf16m4(const __bf16 *rs1, vuint16m4_t rs2,
+                                        size_t vl);
+vbfloat16m8_t __riscv_vluxei16_v_bf16m8(const __bf16 *rs1, vuint16m8_t rs2,
+                                        size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                            vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                            vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                          vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vloxei16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                          vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vloxei16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                          vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vloxei16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                          vuint16m8_t rs2, size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                            vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                            vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                          vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vluxei16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                          vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vluxei16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                          vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vluxei16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                          vuint16m8_t rs2, size_t vl);
+----
+
+[[vector-indexed-store]]
+==== Vector Indexed Store Intrinsics
+
+[,c]
+----
+void __riscv_vsoxei16_v_bf16mf4(__bf16 *rs1, vuint16mf4_t rs2,
+                                vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsoxei16_v_bf16mf2(__bf16 *rs1, vuint16mf2_t rs2,
+                                vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsoxei16_v_bf16m1(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                               size_t vl);
+void __riscv_vsoxei16_v_bf16m2(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                               size_t vl);
+void __riscv_vsoxei16_v_bf16m4(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                               size_t vl);
+void __riscv_vsoxei16_v_bf16m8(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                               size_t vl);
+void __riscv_vsuxei16_v_bf16mf4(__bf16 *rs1, vuint16mf4_t rs2,
+                                vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16mf2(__bf16 *rs1, vuint16mf2_t rs2,
+                                vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16m1(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                               size_t vl);
+void __riscv_vsuxei16_v_bf16m2(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                               size_t vl);
+void __riscv_vsuxei16_v_bf16m4(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                               size_t vl);
+void __riscv_vsuxei16_v_bf16m8(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                               size_t vl);
+// masked functions
+void __riscv_vsoxei16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                                  vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsoxei16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                                  vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsoxei16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                                 vbfloat16m1_t vs3, size_t vl);
+void __riscv_vsoxei16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                                 vbfloat16m2_t vs3, size_t vl);
+void __riscv_vsoxei16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                                 vbfloat16m4_t vs3, size_t vl);
+void __riscv_vsoxei16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                                 vbfloat16m8_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                                  vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                                  vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                                 vbfloat16m1_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                                 vbfloat16m2_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                                 vbfloat16m4_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                                 vbfloat16m8_t vs3, size_t vl);
+----
+
+[[unit-stride-fault-only-first-loads]]
+==== Unit-stride Fault-Only-First Loads Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vle16ff_v_bf16mf4(const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_v_bf16mf2(const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1_t __riscv_vle16ff_v_bf16m1(const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m2_t __riscv_vle16ff_v_bf16m2(const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m4_t __riscv_vle16ff_v_bf16m4(const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m8_t __riscv_vle16ff_v_bf16m8(const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                           size_t *new_vl, size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                           size_t *new_vl, size_t vl);
+vbfloat16m1_t __riscv_vle16ff_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                         size_t *new_vl, size_t vl);
+vbfloat16m2_t __riscv_vle16ff_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                         size_t *new_vl, size_t vl);
+vbfloat16m4_t __riscv_vle16ff_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                         size_t *new_vl, size_t vl);
+vbfloat16m8_t __riscv_vle16ff_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                         size_t *new_vl, size_t vl);
+----
+
+=== BFloat16 Vector Loads and Stores Segment Intrinsics
+
+[[vector-unit-stride-segment-load]]
+==== Vector Unit-Stride Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vlseg2e16_v_bf16mf4x2(const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_v_bf16mf4x3(const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_v_bf16mf4x4(const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_v_bf16mf4x5(const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_v_bf16mf4x6(const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_v_bf16mf4x7(const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_v_bf16mf4x8(const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_v_bf16mf2x2(const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_v_bf16mf2x3(const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_v_bf16mf2x4(const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_v_bf16mf2x5(const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_v_bf16mf2x6(const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_v_bf16mf2x7(const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_v_bf16mf2x8(const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_v_bf16m1x2(const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_v_bf16m1x3(const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_v_bf16m1x4(const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_v_bf16m1x5(const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_v_bf16m1x6(const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_v_bf16m1x7(const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_v_bf16m1x8(const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_v_bf16m2x2(const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_v_bf16m2x3(const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_v_bf16m2x4(const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_v_bf16m4x2(const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_v_bf16mf4x2(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_v_bf16mf4x3(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_v_bf16mf4x4(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_v_bf16mf4x5(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_v_bf16mf4x6(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_v_bf16mf4x7(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_v_bf16mf4x8(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_v_bf16mf2x2(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_v_bf16mf2x3(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_v_bf16mf2x4(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_v_bf16mf2x5(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_v_bf16mf2x6(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_v_bf16mf2x7(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_v_bf16mf2x8(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_v_bf16m1x2(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_v_bf16m1x3(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_v_bf16m1x4(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_v_bf16m1x5(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_v_bf16m1x6(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_v_bf16m1x7(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_v_bf16m1x8(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_v_bf16m2x2(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_v_bf16m2x3(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_v_bf16m2x4(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_v_bf16m4x2(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16_v_bf16mf4x2_m(vbool64_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_v_bf16mf4x3_m(vbool64_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_v_bf16mf4x4_m(vbool64_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_v_bf16mf4x5_m(vbool64_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_v_bf16mf4x6_m(vbool64_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_v_bf16mf4x7_m(vbool64_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_v_bf16mf4x8_m(vbool64_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_v_bf16mf2x2_m(vbool32_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_v_bf16mf2x3_m(vbool32_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_v_bf16mf2x4_m(vbool32_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_v_bf16mf2x5_m(vbool32_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_v_bf16mf2x6_m(vbool32_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_v_bf16mf2x7_m(vbool32_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_v_bf16mf2x8_m(vbool32_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_v_bf16mf4x2_m(vbool64_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_v_bf16mf4x3_m(vbool64_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_v_bf16mf4x4_m(vbool64_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_v_bf16mf4x5_m(vbool64_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_v_bf16mf4x6_m(vbool64_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_v_bf16mf4x7_m(vbool64_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_v_bf16mf4x8_m(vbool64_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_v_bf16mf2x2_m(vbool32_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_v_bf16mf2x3_m(vbool32_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_v_bf16mf2x4_m(vbool32_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_v_bf16mf2x5_m(vbool32_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_v_bf16mf2x6_m(vbool32_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_v_bf16mf2x7_m(vbool32_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_v_bf16mf2x8_m(vbool32_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_v_bf16m1x2_m(vbool16_t vm,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_v_bf16m1x3_m(vbool16_t vm,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_v_bf16m1x4_m(vbool16_t vm,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_v_bf16m1x5_m(vbool16_t vm,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_v_bf16m1x6_m(vbool16_t vm,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_v_bf16m1x7_m(vbool16_t vm,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_v_bf16m1x8_m(vbool16_t vm,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+----
+
+[[vecrtor-unit-stride-segment-store]]
+==== Vector Unit-Stride Segment Store Intrinsics
+
+[,c]
+----
+void __riscv_vsseg2e16_v_bf16mf4x2(__bf16 *rs1, vbfloat16mf4x2_t vs3,
+                                   size_t vl);
+void __riscv_vsseg3e16_v_bf16mf4x3(__bf16 *rs1, vbfloat16mf4x3_t vs3,
+                                   size_t vl);
+void __riscv_vsseg4e16_v_bf16mf4x4(__bf16 *rs1, vbfloat16mf4x4_t vs3,
+                                   size_t vl);
+void __riscv_vsseg5e16_v_bf16mf4x5(__bf16 *rs1, vbfloat16mf4x5_t vs3,
+                                   size_t vl);
+void __riscv_vsseg6e16_v_bf16mf4x6(__bf16 *rs1, vbfloat16mf4x6_t vs3,
+                                   size_t vl);
+void __riscv_vsseg7e16_v_bf16mf4x7(__bf16 *rs1, vbfloat16mf4x7_t vs3,
+                                   size_t vl);
+void __riscv_vsseg8e16_v_bf16mf4x8(__bf16 *rs1, vbfloat16mf4x8_t vs3,
+                                   size_t vl);
+void __riscv_vsseg2e16_v_bf16mf2x2(__bf16 *rs1, vbfloat16mf2x2_t vs3,
+                                   size_t vl);
+void __riscv_vsseg3e16_v_bf16mf2x3(__bf16 *rs1, vbfloat16mf2x3_t vs3,
+                                   size_t vl);
+void __riscv_vsseg4e16_v_bf16mf2x4(__bf16 *rs1, vbfloat16mf2x4_t vs3,
+                                   size_t vl);
+void __riscv_vsseg5e16_v_bf16mf2x5(__bf16 *rs1, vbfloat16mf2x5_t vs3,
+                                   size_t vl);
+void __riscv_vsseg6e16_v_bf16mf2x6(__bf16 *rs1, vbfloat16mf2x6_t vs3,
+                                   size_t vl);
+void __riscv_vsseg7e16_v_bf16mf2x7(__bf16 *rs1, vbfloat16mf2x7_t vs3,
+                                   size_t vl);
+void __riscv_vsseg8e16_v_bf16mf2x8(__bf16 *rs1, vbfloat16mf2x8_t vs3,
+                                   size_t vl);
+void __riscv_vsseg2e16_v_bf16m1x2(__bf16 *rs1, vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vsseg3e16_v_bf16m1x3(__bf16 *rs1, vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vsseg4e16_v_bf16m1x4(__bf16 *rs1, vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vsseg5e16_v_bf16m1x5(__bf16 *rs1, vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vsseg6e16_v_bf16m1x6(__bf16 *rs1, vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vsseg7e16_v_bf16m1x7(__bf16 *rs1, vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vsseg8e16_v_bf16m1x8(__bf16 *rs1, vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vsseg2e16_v_bf16m2x2(__bf16 *rs1, vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vsseg3e16_v_bf16m2x3(__bf16 *rs1, vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vsseg4e16_v_bf16m2x4(__bf16 *rs1, vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vsseg2e16_v_bf16m4x2(__bf16 *rs1, vbfloat16m4x2_t vs3, size_t vl);
+// masked functions
+void __riscv_vsseg2e16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                     vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vsseg3e16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                     vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vsseg4e16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                     vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vsseg5e16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                     vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vsseg6e16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                     vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vsseg7e16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                     vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vsseg8e16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                     vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vsseg2e16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                     vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vsseg3e16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                     vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vsseg4e16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                     vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vsseg5e16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                     vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vsseg6e16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                     vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vsseg7e16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                     vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vsseg8e16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                     vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vsseg2e16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1,
+                                    vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vsseg3e16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1,
+                                    vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vsseg4e16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1,
+                                    vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vsseg5e16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1,
+                                    vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vsseg6e16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1,
+                                    vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vsseg7e16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1,
+                                    vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vsseg8e16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1,
+                                    vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vsseg2e16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1,
+                                    vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vsseg3e16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1,
+                                    vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vsseg4e16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1,
+                                    vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vsseg2e16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1,
+                                    vbfloat16m4x2_t vs3, size_t vl);
+----
+
+[[vector-strided-segment-load]]
+==== Vector Strided Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vlsseg2e16_v_bf16mf4x2(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_v_bf16mf4x3(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_v_bf16mf4x4(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_v_bf16mf4x5(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_v_bf16mf4x6(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_v_bf16mf4x7(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_v_bf16mf4x8(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_v_bf16mf2x2(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_v_bf16mf2x3(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_v_bf16mf2x4(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_v_bf16mf2x5(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_v_bf16mf2x6(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_v_bf16mf2x7(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_v_bf16mf2x8(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_v_bf16m1x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_v_bf16m1x3(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_v_bf16m1x4(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_v_bf16m1x5(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_v_bf16m1x6(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_v_bf16m1x7(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_v_bf16m1x8(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_v_bf16m2x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_v_bf16m2x3(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_v_bf16m2x4(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_v_bf16m4x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16_v_bf16mf4x2_m(vbool64_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_v_bf16mf4x3_m(vbool64_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_v_bf16mf4x4_m(vbool64_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_v_bf16mf4x5_m(vbool64_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_v_bf16mf4x6_m(vbool64_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_v_bf16mf4x7_m(vbool64_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_v_bf16mf4x8_m(vbool64_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_v_bf16mf2x2_m(vbool32_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_v_bf16mf2x3_m(vbool32_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_v_bf16mf2x4_m(vbool32_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_v_bf16mf2x5_m(vbool32_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_v_bf16mf2x6_m(vbool32_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_v_bf16mf2x7_m(vbool32_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_v_bf16mf2x8_m(vbool32_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+----
+
+[[vector-strided-segment-store]]
+==== Vector Strided Segment Store Intrinsics
+
+[,c]
+----
+void __riscv_vssseg2e16_v_bf16mf4x2(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16mf4x3(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16mf4x4(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vssseg5e16_v_bf16mf4x5(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vssseg6e16_v_bf16mf4x6(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vssseg7e16_v_bf16mf4x7(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vssseg8e16_v_bf16mf4x8(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16mf2x2(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16mf2x3(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16mf2x4(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vssseg5e16_v_bf16mf2x5(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vssseg6e16_v_bf16mf2x6(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vssseg7e16_v_bf16mf2x7(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vssseg8e16_v_bf16mf2x8(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16m1x2(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16m1x3(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16m1x4(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vssseg5e16_v_bf16m1x5(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vssseg6e16_v_bf16m1x6(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vssseg7e16_v_bf16m1x7(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vssseg8e16_v_bf16m1x8(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16m2x2(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16m2x3(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16m2x4(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16m4x2(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m4x2_t vs3, size_t vl);
+// masked functions
+void __riscv_vssseg2e16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vssseg5e16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vssseg6e16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vssseg7e16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vssseg8e16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vssseg5e16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vssseg6e16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vssseg7e16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vssseg8e16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vssseg5e16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vssseg6e16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vssseg7e16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vssseg8e16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m4x2_t vs3, size_t vl);
+----
+
+[[vector-indexed-segment-load]]
+==== Vector Indexed Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_v_bf16mf4x2(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_v_bf16mf4x3(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_v_bf16mf4x4(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_v_bf16mf4x5(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_v_bf16mf4x6(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_v_bf16mf4x7(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_v_bf16mf4x8(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_v_bf16mf2x2(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_v_bf16mf2x3(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_v_bf16mf2x4(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_v_bf16mf2x5(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_v_bf16mf2x6(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_v_bf16mf2x7(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_v_bf16mf2x8(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_v_bf16m1x2(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_v_bf16m1x3(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_v_bf16m1x4(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_v_bf16m1x5(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_v_bf16m1x6(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_v_bf16m1x7(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_v_bf16m1x8(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_v_bf16m2x2(const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_v_bf16m2x3(const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_v_bf16m2x4(const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_v_bf16m4x2(const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_v_bf16mf4x2(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_v_bf16mf4x3(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_v_bf16mf4x4(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_v_bf16mf4x5(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_v_bf16mf4x6(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_v_bf16mf4x7(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_v_bf16mf4x8(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_v_bf16mf2x2(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_v_bf16mf2x3(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_v_bf16mf2x4(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_v_bf16mf2x5(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_v_bf16mf2x6(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_v_bf16mf2x7(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_v_bf16mf2x8(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_v_bf16m1x2(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_v_bf16m1x3(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_v_bf16m1x4(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_v_bf16m1x5(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_v_bf16m1x6(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_v_bf16m1x7(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_v_bf16m1x8(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_v_bf16m2x2(const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_v_bf16m2x3(const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_v_bf16m2x4(const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_v_bf16m4x2(const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_v_bf16mf4x2_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_v_bf16mf4x3_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_v_bf16mf4x4_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_v_bf16mf4x5_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_v_bf16mf4x6_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_v_bf16mf4x7_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_v_bf16mf4x8_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_v_bf16mf2x2_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_v_bf16mf2x3_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_v_bf16mf2x4_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_v_bf16mf2x5_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_v_bf16mf2x6_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_v_bf16mf2x7_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_v_bf16mf2x8_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_v_bf16m1x2_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_v_bf16m1x3_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_v_bf16m1x4_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_v_bf16m1x5_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_v_bf16m1x6_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_v_bf16m1x7_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_v_bf16m1x8_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_v_bf16m2x2_m(vbool8_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_v_bf16m2x3_m(vbool8_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_v_bf16m2x4_m(vbool8_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_v_bf16m4x2_m(vbool4_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m4_t rs2, size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_v_bf16mf4x2_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_v_bf16mf4x3_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_v_bf16mf4x4_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_v_bf16mf4x5_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_v_bf16mf4x6_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_v_bf16mf4x7_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_v_bf16mf4x8_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_v_bf16mf2x2_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_v_bf16mf2x3_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_v_bf16mf2x4_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_v_bf16mf2x5_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_v_bf16mf2x6_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_v_bf16mf2x7_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_v_bf16mf2x8_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_v_bf16m1x2_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_v_bf16m1x3_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_v_bf16m1x4_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_v_bf16m1x5_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_v_bf16m1x6_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_v_bf16m1x7_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_v_bf16m1x8_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_v_bf16m2x2_m(vbool8_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_v_bf16m2x3_m(vbool8_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_v_bf16m2x4_m(vbool8_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_v_bf16m4x2_m(vbool4_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m4_t rs2, size_t vl);
+----
+
+[[vector-indexed-segment-store]]
+==== Vector Indexed Segment Store Intrinsics
+
+[,c]
+----
+void __riscv_vsoxseg2ei16_v_bf16mf4x2(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16mf4x3(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16mf4x4(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vsoxseg5ei16_v_bf16mf4x5(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vsoxseg6ei16_v_bf16mf4x6(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vsoxseg7ei16_v_bf16mf4x7(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vsoxseg8ei16_v_bf16mf4x8(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16mf2x2(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16mf2x3(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16mf2x4(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vsoxseg5ei16_v_bf16mf2x5(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vsoxseg6ei16_v_bf16mf2x6(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vsoxseg7ei16_v_bf16mf2x7(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vsoxseg8ei16_v_bf16mf2x8(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16m1x2(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16m1x3(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16m1x4(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vsoxseg5ei16_v_bf16m1x5(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vsoxseg6ei16_v_bf16m1x6(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vsoxseg7ei16_v_bf16m1x7(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vsoxseg8ei16_v_bf16m1x8(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16m2x2(__bf16 *rs1, vuint16m2_t vs2,
+                                     vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16m2x3(__bf16 *rs1, vuint16m2_t vs2,
+                                     vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16m2x4(__bf16 *rs1, vuint16m2_t vs2,
+                                     vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16m4x2(__bf16 *rs1, vuint16m4_t vs2,
+                                     vbfloat16m4x2_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16mf4x2(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16mf4x3(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16mf4x4(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vsuxseg5ei16_v_bf16mf4x5(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vsuxseg6ei16_v_bf16mf4x6(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vsuxseg7ei16_v_bf16mf4x7(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vsuxseg8ei16_v_bf16mf4x8(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16mf2x2(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16mf2x3(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16mf2x4(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vsuxseg5ei16_v_bf16mf2x5(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vsuxseg6ei16_v_bf16mf2x6(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vsuxseg7ei16_v_bf16mf2x7(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vsuxseg8ei16_v_bf16mf2x8(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16m1x2(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16m1x3(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16m1x4(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vsuxseg5ei16_v_bf16m1x5(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vsuxseg6ei16_v_bf16m1x6(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vsuxseg7ei16_v_bf16m1x7(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vsuxseg8ei16_v_bf16m1x8(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16m2x2(__bf16 *rs1, vuint16m2_t vs2,
+                                     vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16m2x3(__bf16 *rs1, vuint16m2_t vs2,
+                                     vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16m2x4(__bf16 *rs1, vuint16m2_t vs2,
+                                     vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16m4x2(__bf16 *rs1, vuint16m4_t vs2,
+                                     vbfloat16m4x2_t vs3, size_t vl);
+// masked functions
+void __riscv_vsoxseg2ei16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg5ei16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg6ei16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg7ei16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg8ei16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg5ei16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg6ei16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg7ei16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg8ei16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x2_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x3_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x4_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg5ei16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x5_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg6ei16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x6_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg7ei16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x7_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg8ei16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x8_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1,
+                                       vuint16m2_t vs2, vbfloat16m2x2_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1,
+                                       vuint16m2_t vs2, vbfloat16m2x3_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1,
+                                       vuint16m2_t vs2, vbfloat16m2x4_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1,
+                                       vuint16m4_t vs2, vbfloat16m4x2_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg5ei16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg6ei16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg7ei16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg8ei16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg5ei16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg6ei16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg7ei16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg8ei16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x2_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x3_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x4_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg5ei16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x5_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg6ei16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x6_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg7ei16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x7_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg8ei16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x8_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1,
+                                       vuint16m2_t vs2, vbfloat16m2x2_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1,
+                                       vuint16m2_t vs2, vbfloat16m2x3_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1,
+                                       vuint16m2_t vs2, vbfloat16m2x4_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1,
+                                       vuint16m4_t vs2, vbfloat16m4x2_t vs3,
+                                       size_t vl);
+----
+
+=== BFloat16 Convert Intrinsics
+
+[[bf16-vector-narrow-convert]]
+==== Vector Narrowing Convert Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4(vfloat32mf2_t vs2, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2(vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1(vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2(vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4(vfloat32m8_t vs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_m(vbool64_t vm,
+                                                  vfloat32mf2_t vs2, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_m(vbool32_t vm,
+                                                  vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_m(vbool16_t vm, vfloat32m2_t vs2,
+                                                size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_m(vbool8_t vm, vfloat32m4_t vs2,
+                                                size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_m(vbool4_t vm, vfloat32m8_t vs2,
+                                                size_t vl);
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_rm(vfloat32mf2_t vs2,
+                                                   unsigned int frm, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_rm(vfloat32m1_t vs2,
+                                                   unsigned int frm, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_rm(vfloat32m2_t vs2,
+                                                 unsigned int frm, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_rm(vfloat32m4_t vs2,
+                                                 unsigned int frm, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_rm(vfloat32m8_t vs2,
+                                                 unsigned int frm, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_m(vbool64_t vm,
+                                                     vfloat32mf2_t vs2,
+                                                     unsigned int frm,
+                                                     size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_m(vbool32_t vm,
+                                                     vfloat32m1_t vs2,
+                                                     unsigned int frm,
+                                                     size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_rm_m(vbool16_t vm,
+                                                   vfloat32m2_t vs2,
+                                                   unsigned int frm, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_rm_m(vbool8_t vm,
+                                                   vfloat32m4_t vs2,
+                                                   unsigned int frm, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_rm_m(vbool4_t vm,
+                                                   vfloat32m8_t vs2,
+                                                   unsigned int frm, size_t vl);
+----
+
+[[bf16-vector-widening-convert]]
+==== Vector Widening Convert Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwcvtbf16_f_f_v_f32mf2(vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_f_v_f32m1(vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_f_v_f32m2(vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_f_v_f32m4(vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_f_v_f32m8(vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f_f_v_f32mf2_m(vbool64_t vm,
+                                                vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_f_v_f32m1_m(vbool32_t vm, vbfloat16mf2_t vs2,
+                                              size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_f_v_f32m2_m(vbool16_t vm, vbfloat16m1_t vs2,
+                                              size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_f_v_f32m4_m(vbool8_t vm, vbfloat16m2_t vs2,
+                                              size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_f_v_f32m8_m(vbool4_t vm, vbfloat16m4_t vs2,
+                                              size_t vl);
+----
+
+=== BFloat16 Arithmetic Intrinsics
+
+[[bf16-widening-multiply-accumulate]]
+==== Vector Widening Multiply-Accumulate Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2(vfloat32mf2_t vd,
+                                            vbfloat16mf4_t vs1,
+                                            vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2(vfloat32mf2_t vd, __bf16 vs1,
+                                            vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                          vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1(vfloat32m1_t vd, __bf16 vs1,
+                                          vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                          vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2(vfloat32m2_t vd, __bf16 vs1,
+                                          vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                          vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4(vfloat32m4_t vd, __bf16 vs1,
+                                          vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                          vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8(vfloat32m8_t vd, __bf16 vs1,
+                                          vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_m(vbool64_t vm, vfloat32mf2_t vd,
+                                              vbfloat16mf4_t vs1,
+                                              vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_m(vbool64_t vm, vfloat32mf2_t vd,
+                                              __bf16 vs1, vbfloat16mf4_t vs2,
+                                              size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_m(vbool32_t vm, vfloat32m1_t vd,
+                                            vbfloat16mf2_t vs1,
+                                            vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_m(vbool32_t vm, vfloat32m1_t vd,
+                                            __bf16 vs1, vbfloat16mf2_t vs2,
+                                            size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_m(vbool16_t vm, vfloat32m2_t vd,
+                                            vbfloat16m1_t vs1,
+                                            vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_m(vbool16_t vm, vfloat32m2_t vd,
+                                            __bf16 vs1, vbfloat16m1_t vs2,
+                                            size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_m(vbool8_t vm, vfloat32m4_t vd,
+                                            vbfloat16m2_t vs1,
+                                            vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_m(vbool8_t vm, vfloat32m4_t vd,
+                                            __bf16 vs1, vbfloat16m2_t vs2,
+                                            size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_m(vbool4_t vm, vfloat32m8_t vd,
+                                            vbfloat16m4_t vs1,
+                                            vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_m(vbool4_t vm, vfloat32m8_t vd,
+                                            __bf16 vs1, vbfloat16m4_t vs2,
+                                            size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_rm(vfloat32mf2_t vd,
+                                               vbfloat16mf4_t vs1,
+                                               vbfloat16mf4_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_rm(vfloat32mf2_t vd, __bf16 vs1,
+                                               vbfloat16mf4_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_rm(vfloat32m1_t vd,
+                                             vbfloat16mf2_t vs1,
+                                             vbfloat16mf2_t vs2,
+                                             unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_rm(vfloat32m1_t vd, __bf16 vs1,
+                                             vbfloat16mf2_t vs2,
+                                             unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_rm(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                             vbfloat16m1_t vs2,
+                                             unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_rm(vfloat32m2_t vd, __bf16 vs1,
+                                             vbfloat16m1_t vs2,
+                                             unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_rm(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                             vbfloat16m2_t vs2,
+                                             unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_rm(vfloat32m4_t vd, __bf16 vs1,
+                                             vbfloat16m2_t vs2,
+                                             unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_rm(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                             vbfloat16m4_t vs2,
+                                             unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_rm(vfloat32m8_t vd, __bf16 vs1,
+                                             vbfloat16m4_t vs2,
+                                             unsigned int frm, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_rm_m(vbool64_t vm, vfloat32mf2_t vd,
+                                                 vbfloat16mf4_t vs1,
+                                                 vbfloat16mf4_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_rm_m(vbool64_t vm, vfloat32mf2_t vd,
+                                                 __bf16 vs1, vbfloat16mf4_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_rm_m(vbool32_t vm, vfloat32m1_t vd,
+                                               vbfloat16mf2_t vs1,
+                                               vbfloat16mf2_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_rm_m(vbool32_t vm, vfloat32m1_t vd,
+                                               __bf16 vs1, vbfloat16mf2_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_rm_m(vbool16_t vm, vfloat32m2_t vd,
+                                               vbfloat16m1_t vs1,
+                                               vbfloat16m1_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_rm_m(vbool16_t vm, vfloat32m2_t vd,
+                                               __bf16 vs1, vbfloat16m1_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_rm_m(vbool8_t vm, vfloat32m4_t vd,
+                                               vbfloat16m2_t vs1,
+                                               vbfloat16m2_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_rm_m(vbool8_t vm, vfloat32m4_t vd,
+                                               __bf16 vs1, vbfloat16m2_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_rm_m(vbool4_t vm, vfloat32m8_t vd,
+                                               vbfloat16m4_t vs1,
+                                               vbfloat16m4_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_rm_m(vbool4_t vm, vfloat32m8_t vd,
+                                               __bf16 vs1, vbfloat16m4_t vs2,
+                                               unsigned int frm, size_t vl);
+----
+
+=== BFloat16 Miscellaneous Vector Utility Intrinsics
+
+[[reinterpret-cast-conversion]]
+==== Reinterpret Cast Conversion Intrinsics
+
+[,c]
+----
+// Reinterpret between different type under the same SEW/LMUL
+vbfloat16mf4_t __riscv_vreinterpret_v_i16mf4_bf16mf4(vint16mf4_t src);
+vbfloat16mf2_t __riscv_vreinterpret_v_i16mf2_bf16mf2(vint16mf2_t src);
+vbfloat16m1_t __riscv_vreinterpret_v_i16m1_bf16m1(vint16m1_t src);
+vbfloat16m2_t __riscv_vreinterpret_v_i16m2_bf16m2(vint16m2_t src);
+vbfloat16m4_t __riscv_vreinterpret_v_i16m4_bf16m4(vint16m4_t src);
+vbfloat16m8_t __riscv_vreinterpret_v_i16m8_bf16m8(vint16m8_t src);
+vbfloat16mf4_t __riscv_vreinterpret_v_u16mf4_bf16mf4(vuint16mf4_t src);
+vbfloat16mf2_t __riscv_vreinterpret_v_u16mf2_bf16mf2(vuint16mf2_t src);
+vbfloat16m1_t __riscv_vreinterpret_v_u16m1_bf16m1(vuint16m1_t src);
+vbfloat16m2_t __riscv_vreinterpret_v_u16m2_bf16m2(vuint16m2_t src);
+vbfloat16m4_t __riscv_vreinterpret_v_u16m4_bf16m4(vuint16m4_t src);
+vbfloat16m8_t __riscv_vreinterpret_v_u16m8_bf16m8(vuint16m8_t src);
+vint16mf4_t __riscv_vreinterpret_v_bf16mf4_i16mf4(vbfloat16mf4_t src);
+vint16mf2_t __riscv_vreinterpret_v_bf16mf2_i16mf2(vbfloat16mf2_t src);
+vint16m1_t __riscv_vreinterpret_v_bf16m1_i16m1(vbfloat16m1_t src);
+vint16m2_t __riscv_vreinterpret_v_bf16m2_i16m2(vbfloat16m2_t src);
+vint16m4_t __riscv_vreinterpret_v_bf16m4_i16m4(vbfloat16m4_t src);
+vint16m8_t __riscv_vreinterpret_v_bf16m8_i16m8(vbfloat16m8_t src);
+vuint16mf4_t __riscv_vreinterpret_v_bf16mf4_u16mf4(vbfloat16mf4_t src);
+vuint16mf2_t __riscv_vreinterpret_v_bf16mf2_u16mf2(vbfloat16mf2_t src);
+vuint16m1_t __riscv_vreinterpret_v_bf16m1_u16m1(vbfloat16m1_t src);
+vuint16m2_t __riscv_vreinterpret_v_bf16m2_u16m2(vbfloat16m2_t src);
+vuint16m4_t __riscv_vreinterpret_v_bf16m4_u16m4(vbfloat16m4_t src);
+vuint16m8_t __riscv_vreinterpret_v_bf16m8_u16m8(vbfloat16m8_t src);
+----
+
+[[vector-lmul-extensionn]]
+==== Vector LMUL Extension Intrinsics
+
+[,c]
+----
+vbfloat16mf2_t __riscv_vlmul_ext_v_bf16mf4_bf16mf2(vbfloat16mf4_t value);
+vbfloat16m1_t __riscv_vlmul_ext_v_bf16mf4_bf16m1(vbfloat16mf4_t value);
+vbfloat16m2_t __riscv_vlmul_ext_v_bf16mf4_bf16m2(vbfloat16mf4_t value);
+vbfloat16m4_t __riscv_vlmul_ext_v_bf16mf4_bf16m4(vbfloat16mf4_t value);
+vbfloat16m8_t __riscv_vlmul_ext_v_bf16mf4_bf16m8(vbfloat16mf4_t value);
+vbfloat16m1_t __riscv_vlmul_ext_v_bf16mf2_bf16m1(vbfloat16mf2_t value);
+vbfloat16m2_t __riscv_vlmul_ext_v_bf16mf2_bf16m2(vbfloat16mf2_t value);
+vbfloat16m4_t __riscv_vlmul_ext_v_bf16mf2_bf16m4(vbfloat16mf2_t value);
+vbfloat16m8_t __riscv_vlmul_ext_v_bf16mf2_bf16m8(vbfloat16mf2_t value);
+vbfloat16m2_t __riscv_vlmul_ext_v_bf16m1_bf16m2(vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vlmul_ext_v_bf16m1_bf16m4(vbfloat16m1_t value);
+vbfloat16m8_t __riscv_vlmul_ext_v_bf16m1_bf16m8(vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vlmul_ext_v_bf16m2_bf16m4(vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vlmul_ext_v_bf16m2_bf16m8(vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vlmul_ext_v_bf16m4_bf16m8(vbfloat16m4_t value);
+----
+
+[[vector-lmul-truncation]]
+==== Vector LMUL Truncation Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vlmul_trunc_v_bf16mf2_bf16mf4(vbfloat16mf2_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_v_bf16m1_bf16mf4(vbfloat16m1_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_v_bf16m1_bf16mf2(vbfloat16m1_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_v_bf16m2_bf16mf4(vbfloat16m2_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_v_bf16m2_bf16mf2(vbfloat16m2_t value);
+vbfloat16m1_t __riscv_vlmul_trunc_v_bf16m2_bf16m1(vbfloat16m2_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_v_bf16m4_bf16mf4(vbfloat16m4_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_v_bf16m4_bf16mf2(vbfloat16m4_t value);
+vbfloat16m1_t __riscv_vlmul_trunc_v_bf16m4_bf16m1(vbfloat16m4_t value);
+vbfloat16m2_t __riscv_vlmul_trunc_v_bf16m4_bf16m2(vbfloat16m4_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_v_bf16m8_bf16mf4(vbfloat16m8_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_v_bf16m8_bf16mf2(vbfloat16m8_t value);
+vbfloat16m1_t __riscv_vlmul_trunc_v_bf16m8_bf16m1(vbfloat16m8_t value);
+vbfloat16m2_t __riscv_vlmul_trunc_v_bf16m8_bf16m2(vbfloat16m8_t value);
+vbfloat16m4_t __riscv_vlmul_trunc_v_bf16m8_bf16m4(vbfloat16m8_t value);
+----
+
+[[vector-initialization]]
+==== Vector Initialization Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vundefined_bf16mf4();
+vbfloat16mf2_t __riscv_vundefined_bf16mf2();
+vbfloat16m1_t __riscv_vundefined_bf16m1();
+vbfloat16m2_t __riscv_vundefined_bf16m2();
+vbfloat16m4_t __riscv_vundefined_bf16m4();
+vbfloat16m8_t __riscv_vundefined_bf16m8();
+vbfloat16mf4x2_t __riscv_vundefined_bf16mf4x2();
+vbfloat16mf4x3_t __riscv_vundefined_bf16mf4x3();
+vbfloat16mf4x4_t __riscv_vundefined_bf16mf4x4();
+vbfloat16mf4x5_t __riscv_vundefined_bf16mf4x5();
+vbfloat16mf4x6_t __riscv_vundefined_bf16mf4x6();
+vbfloat16mf4x7_t __riscv_vundefined_bf16mf4x7();
+vbfloat16mf4x8_t __riscv_vundefined_bf16mf4x8();
+vbfloat16mf2x2_t __riscv_vundefined_bf16mf2x2();
+vbfloat16mf2x3_t __riscv_vundefined_bf16mf2x3();
+vbfloat16mf2x4_t __riscv_vundefined_bf16mf2x4();
+vbfloat16mf2x5_t __riscv_vundefined_bf16mf2x5();
+vbfloat16mf2x6_t __riscv_vundefined_bf16mf2x6();
+vbfloat16mf2x7_t __riscv_vundefined_bf16mf2x7();
+vbfloat16mf2x8_t __riscv_vundefined_bf16mf2x8();
+vbfloat16m1x2_t __riscv_vundefined_bf16m1x2();
+vbfloat16m1x3_t __riscv_vundefined_bf16m1x3();
+vbfloat16m1x4_t __riscv_vundefined_bf16m1x4();
+vbfloat16m1x5_t __riscv_vundefined_bf16m1x5();
+vbfloat16m1x6_t __riscv_vundefined_bf16m1x6();
+vbfloat16m1x7_t __riscv_vundefined_bf16m1x7();
+vbfloat16m1x8_t __riscv_vundefined_bf16m1x8();
+vbfloat16m2x2_t __riscv_vundefined_bf16m2x2();
+vbfloat16m2x3_t __riscv_vundefined_bf16m2x3();
+vbfloat16m2x4_t __riscv_vundefined_bf16m2x4();
+vbfloat16m4x2_t __riscv_vundefined_bf16m4x2();
+----
+
+[[vector-insertion]]
+==== Vector Insertion Intrinsics
+
+[,c]
+----
+vbfloat16m2_t __riscv_vset_v_bf16m1_bf16m2(vbfloat16m2_t dest, size_t index,
+                                           vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vset_v_bf16m1_bf16m4(vbfloat16m4_t dest, size_t index,
+                                           vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vset_v_bf16m2_bf16m4(vbfloat16m4_t dest, size_t index,
+                                           vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vset_v_bf16m1_bf16m8(vbfloat16m8_t dest, size_t index,
+                                           vbfloat16m1_t value);
+vbfloat16m8_t __riscv_vset_v_bf16m2_bf16m8(vbfloat16m8_t dest, size_t index,
+                                           vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vset_v_bf16m4_bf16m8(vbfloat16m8_t dest, size_t index,
+                                           vbfloat16m4_t value);
+vbfloat16mf4x2_t __riscv_vset_v_bf16mf4_bf16mf4x2(vbfloat16mf4x2_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf4_t value);
+vbfloat16mf4x3_t __riscv_vset_v_bf16mf4_bf16mf4x3(vbfloat16mf4x3_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf4_t value);
+vbfloat16mf4x4_t __riscv_vset_v_bf16mf4_bf16mf4x4(vbfloat16mf4x4_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf4_t value);
+vbfloat16mf4x5_t __riscv_vset_v_bf16mf4_bf16mf4x5(vbfloat16mf4x5_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf4_t value);
+vbfloat16mf4x6_t __riscv_vset_v_bf16mf4_bf16mf4x6(vbfloat16mf4x6_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf4_t value);
+vbfloat16mf4x7_t __riscv_vset_v_bf16mf4_bf16mf4x7(vbfloat16mf4x7_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf4_t value);
+vbfloat16mf4x8_t __riscv_vset_v_bf16mf4_bf16mf4x8(vbfloat16mf4x8_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf4_t value);
+vbfloat16mf2x2_t __riscv_vset_v_bf16mf2_bf16mf2x2(vbfloat16mf2x2_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf2_t value);
+vbfloat16mf2x3_t __riscv_vset_v_bf16mf2_bf16mf2x3(vbfloat16mf2x3_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf2_t value);
+vbfloat16mf2x4_t __riscv_vset_v_bf16mf2_bf16mf2x4(vbfloat16mf2x4_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf2_t value);
+vbfloat16mf2x5_t __riscv_vset_v_bf16mf2_bf16mf2x5(vbfloat16mf2x5_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf2_t value);
+vbfloat16mf2x6_t __riscv_vset_v_bf16mf2_bf16mf2x6(vbfloat16mf2x6_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf2_t value);
+vbfloat16mf2x7_t __riscv_vset_v_bf16mf2_bf16mf2x7(vbfloat16mf2x7_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf2_t value);
+vbfloat16mf2x8_t __riscv_vset_v_bf16mf2_bf16mf2x8(vbfloat16mf2x8_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf2_t value);
+vbfloat16m1x2_t __riscv_vset_v_bf16m1_bf16m1x2(vbfloat16m1x2_t dest,
+                                               size_t index,
+                                               vbfloat16m1_t value);
+vbfloat16m1x3_t __riscv_vset_v_bf16m1_bf16m1x3(vbfloat16m1x3_t dest,
+                                               size_t index,
+                                               vbfloat16m1_t value);
+vbfloat16m1x4_t __riscv_vset_v_bf16m1_bf16m1x4(vbfloat16m1x4_t dest,
+                                               size_t index,
+                                               vbfloat16m1_t value);
+vbfloat16m1x5_t __riscv_vset_v_bf16m1_bf16m1x5(vbfloat16m1x5_t dest,
+                                               size_t index,
+                                               vbfloat16m1_t value);
+vbfloat16m1x6_t __riscv_vset_v_bf16m1_bf16m1x6(vbfloat16m1x6_t dest,
+                                               size_t index,
+                                               vbfloat16m1_t value);
+vbfloat16m1x7_t __riscv_vset_v_bf16m1_bf16m1x7(vbfloat16m1x7_t dest,
+                                               size_t index,
+                                               vbfloat16m1_t value);
+vbfloat16m1x8_t __riscv_vset_v_bf16m1_bf16m1x8(vbfloat16m1x8_t dest,
+                                               size_t index,
+                                               vbfloat16m1_t value);
+vbfloat16m2x2_t __riscv_vset_v_bf16m2_bf16m2x2(vbfloat16m2x2_t dest,
+                                               size_t index,
+                                               vbfloat16m2_t value);
+vbfloat16m2x3_t __riscv_vset_v_bf16m2_bf16m2x3(vbfloat16m2x3_t dest,
+                                               size_t index,
+                                               vbfloat16m2_t value);
+vbfloat16m2x4_t __riscv_vset_v_bf16m2_bf16m2x4(vbfloat16m2x4_t dest,
+                                               size_t index,
+                                               vbfloat16m2_t value);
+vbfloat16m4x2_t __riscv_vset_v_bf16m4_bf16m4x2(vbfloat16m4x2_t dest,
+                                               size_t index,
+                                               vbfloat16m4_t value);
+----
+
+[[vector-extraction]]
+==== Vector Extraction Intrinsics
+
+[,c]
+----
+vbfloat16m1_t __riscv_vget_v_bf16m2_bf16m1(vbfloat16m2_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m4_bf16m1(vbfloat16m4_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m8_bf16m1(vbfloat16m8_t src, size_t index);
+vbfloat16m2_t __riscv_vget_v_bf16m4_bf16m2(vbfloat16m4_t src, size_t index);
+vbfloat16m2_t __riscv_vget_v_bf16m8_bf16m2(vbfloat16m8_t src, size_t index);
+vbfloat16m4_t __riscv_vget_v_bf16m8_bf16m4(vbfloat16m8_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_v_bf16mf4x2_bf16mf4(vbfloat16mf4x2_t src,
+                                                size_t index);
+vbfloat16mf4_t __riscv_vget_v_bf16mf4x3_bf16mf4(vbfloat16mf4x3_t src,
+                                                size_t index);
+vbfloat16mf4_t __riscv_vget_v_bf16mf4x4_bf16mf4(vbfloat16mf4x4_t src,
+                                                size_t index);
+vbfloat16mf4_t __riscv_vget_v_bf16mf4x5_bf16mf4(vbfloat16mf4x5_t src,
+                                                size_t index);
+vbfloat16mf4_t __riscv_vget_v_bf16mf4x6_bf16mf4(vbfloat16mf4x6_t src,
+                                                size_t index);
+vbfloat16mf4_t __riscv_vget_v_bf16mf4x7_bf16mf4(vbfloat16mf4x7_t src,
+                                                size_t index);
+vbfloat16mf4_t __riscv_vget_v_bf16mf4x8_bf16mf4(vbfloat16mf4x8_t src,
+                                                size_t index);
+vbfloat16mf2_t __riscv_vget_v_bf16mf2x2_bf16mf2(vbfloat16mf2x2_t src,
+                                                size_t index);
+vbfloat16mf2_t __riscv_vget_v_bf16mf2x3_bf16mf2(vbfloat16mf2x3_t src,
+                                                size_t index);
+vbfloat16mf2_t __riscv_vget_v_bf16mf2x4_bf16mf2(vbfloat16mf2x4_t src,
+                                                size_t index);
+vbfloat16mf2_t __riscv_vget_v_bf16mf2x5_bf16mf2(vbfloat16mf2x5_t src,
+                                                size_t index);
+vbfloat16mf2_t __riscv_vget_v_bf16mf2x6_bf16mf2(vbfloat16mf2x6_t src,
+                                                size_t index);
+vbfloat16mf2_t __riscv_vget_v_bf16mf2x7_bf16mf2(vbfloat16mf2x7_t src,
+                                                size_t index);
+vbfloat16mf2_t __riscv_vget_v_bf16mf2x8_bf16mf2(vbfloat16mf2x8_t src,
+                                                size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m1x2_bf16m1(vbfloat16m1x2_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m1x3_bf16m1(vbfloat16m1x3_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m1x4_bf16m1(vbfloat16m1x4_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m1x5_bf16m1(vbfloat16m1x5_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m1x6_bf16m1(vbfloat16m1x6_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m1x7_bf16m1(vbfloat16m1x7_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m1x8_bf16m1(vbfloat16m1x8_t src, size_t index);
+vbfloat16m2_t __riscv_vget_v_bf16m2x2_bf16m2(vbfloat16m2x2_t src, size_t index);
+vbfloat16m2_t __riscv_vget_v_bf16m2x3_bf16m2(vbfloat16m2x3_t src, size_t index);
+vbfloat16m2_t __riscv_vget_v_bf16m2x4_bf16m2(vbfloat16m2x4_t src, size_t index);
+vbfloat16m4_t __riscv_vget_v_bf16m4x2_bf16m4(vbfloat16m4x2_t src, size_t index);
+----
+
+[[vector-creation]]
+==== Vector Creation Intrinsics
+
+[,c]
+----
+vbfloat16m2_t __riscv_vcreate_v_bf16m1_bf16m2(vbfloat16m1_t v0,
+                                              vbfloat16m1_t v1);
+vbfloat16m4_t __riscv_vcreate_v_bf16m1_bf16m4(vbfloat16m1_t v0,
+                                              vbfloat16m1_t v1,
+                                              vbfloat16m1_t v2,
+                                              vbfloat16m1_t v3);
+vbfloat16m8_t __riscv_vcreate_v_bf16m1_bf16m8(
+    vbfloat16m1_t v0, vbfloat16m1_t v1, vbfloat16m1_t v2, vbfloat16m1_t v3,
+    vbfloat16m1_t v4, vbfloat16m1_t v5, vbfloat16m1_t v6, vbfloat16m1_t v7);
+vbfloat16m4_t __riscv_vcreate_v_bf16m2_bf16m4(vbfloat16m2_t v0,
+                                              vbfloat16m2_t v1);
+vbfloat16m8_t __riscv_vcreate_v_bf16m2_bf16m8(vbfloat16m2_t v0,
+                                              vbfloat16m2_t v1,
+                                              vbfloat16m2_t v2,
+                                              vbfloat16m2_t v3);
+vbfloat16m8_t __riscv_vcreate_v_bf16m4_bf16m8(vbfloat16m4_t v0,
+                                              vbfloat16m4_t v1);
+vbfloat16mf4x2_t __riscv_vcreate_v_bf16mf4x2(vbfloat16mf4_t v0,
+                                             vbfloat16mf4_t v1);
+vbfloat16mf4x3_t __riscv_vcreate_v_bf16mf4x3(vbfloat16mf4_t v0,
+                                             vbfloat16mf4_t v1,
+                                             vbfloat16mf4_t v2);
+vbfloat16mf4x4_t __riscv_vcreate_v_bf16mf4x4(vbfloat16mf4_t v0,
+                                             vbfloat16mf4_t v1,
+                                             vbfloat16mf4_t v2,
+                                             vbfloat16mf4_t v3);
+vbfloat16mf4x5_t __riscv_vcreate_v_bf16mf4x5(vbfloat16mf4_t v0,
+                                             vbfloat16mf4_t v1,
+                                             vbfloat16mf4_t v2,
+                                             vbfloat16mf4_t v3,
+                                             vbfloat16mf4_t v4);
+vbfloat16mf4x6_t
+__riscv_vcreate_v_bf16mf4x6(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
+                            vbfloat16mf4_t v2, vbfloat16mf4_t v3,
+                            vbfloat16mf4_t v4, vbfloat16mf4_t v5);
+vbfloat16mf4x7_t __riscv_vcreate_v_bf16mf4x7(
+    vbfloat16mf4_t v0, vbfloat16mf4_t v1, vbfloat16mf4_t v2, vbfloat16mf4_t v3,
+    vbfloat16mf4_t v4, vbfloat16mf4_t v5, vbfloat16mf4_t v6);
+vbfloat16mf4x8_t __riscv_vcreate_v_bf16mf4x8(
+    vbfloat16mf4_t v0, vbfloat16mf4_t v1, vbfloat16mf4_t v2, vbfloat16mf4_t v3,
+    vbfloat16mf4_t v4, vbfloat16mf4_t v5, vbfloat16mf4_t v6, vbfloat16mf4_t v7);
+vbfloat16mf2x2_t __riscv_vcreate_v_bf16mf2x2(vbfloat16mf2_t v0,
+                                             vbfloat16mf2_t v1);
+vbfloat16mf2x3_t __riscv_vcreate_v_bf16mf2x3(vbfloat16mf2_t v0,
+                                             vbfloat16mf2_t v1,
+                                             vbfloat16mf2_t v2);
+vbfloat16mf2x4_t __riscv_vcreate_v_bf16mf2x4(vbfloat16mf2_t v0,
+                                             vbfloat16mf2_t v1,
+                                             vbfloat16mf2_t v2,
+                                             vbfloat16mf2_t v3);
+vbfloat16mf2x5_t __riscv_vcreate_v_bf16mf2x5(vbfloat16mf2_t v0,
+                                             vbfloat16mf2_t v1,
+                                             vbfloat16mf2_t v2,
+                                             vbfloat16mf2_t v3,
+                                             vbfloat16mf2_t v4);
+vbfloat16mf2x6_t
+__riscv_vcreate_v_bf16mf2x6(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
+                            vbfloat16mf2_t v2, vbfloat16mf2_t v3,
+                            vbfloat16mf2_t v4, vbfloat16mf2_t v5);
+vbfloat16mf2x7_t __riscv_vcreate_v_bf16mf2x7(
+    vbfloat16mf2_t v0, vbfloat16mf2_t v1, vbfloat16mf2_t v2, vbfloat16mf2_t v3,
+    vbfloat16mf2_t v4, vbfloat16mf2_t v5, vbfloat16mf2_t v6);
+vbfloat16mf2x8_t __riscv_vcreate_v_bf16mf2x8(
+    vbfloat16mf2_t v0, vbfloat16mf2_t v1, vbfloat16mf2_t v2, vbfloat16mf2_t v3,
+    vbfloat16mf2_t v4, vbfloat16mf2_t v5, vbfloat16mf2_t v6, vbfloat16mf2_t v7);
+vbfloat16m1x2_t __riscv_vcreate_v_bf16m1x2(vbfloat16m1_t v0, vbfloat16m1_t v1);
+vbfloat16m1x3_t __riscv_vcreate_v_bf16m1x3(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2);
+vbfloat16m1x4_t __riscv_vcreate_v_bf16m1x4(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2, vbfloat16m1_t v3);
+vbfloat16m1x5_t __riscv_vcreate_v_bf16m1x5(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                           vbfloat16m1_t v4);
+vbfloat16m1x6_t __riscv_vcreate_v_bf16m1x6(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                           vbfloat16m1_t v4, vbfloat16m1_t v5);
+vbfloat16m1x7_t __riscv_vcreate_v_bf16m1x7(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                           vbfloat16m1_t v4, vbfloat16m1_t v5,
+                                           vbfloat16m1_t v6);
+vbfloat16m1x8_t __riscv_vcreate_v_bf16m1x8(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                           vbfloat16m1_t v4, vbfloat16m1_t v5,
+                                           vbfloat16m1_t v6, vbfloat16m1_t v7);
+vbfloat16m2x2_t __riscv_vcreate_v_bf16m2x2(vbfloat16m2_t v0, vbfloat16m2_t v1);
+vbfloat16m2x3_t __riscv_vcreate_v_bf16m2x3(vbfloat16m2_t v0, vbfloat16m2_t v1,
+                                           vbfloat16m2_t v2);
+vbfloat16m2x4_t __riscv_vcreate_v_bf16m2x4(vbfloat16m2_t v0, vbfloat16m2_t v1,
+                                           vbfloat16m2_t v2, vbfloat16m2_t v3);
+vbfloat16m4x2_t __riscv_vcreate_v_bf16m4x2(vbfloat16m4_t v0, vbfloat16m4_t v1);
+----
diff --git a/auto-generated/bfloat16/intrinsic_funcs/00_bfloat16_vector_loads_and_stores_intrinsics.adoc b/auto-generated/bfloat16/intrinsic_funcs/00_bfloat16_vector_loads_and_stores_intrinsics.adoc
new file mode 100644
index 000000000..db9f6077c
--- /dev/null
+++ b/auto-generated/bfloat16/intrinsic_funcs/00_bfloat16_vector_loads_and_stores_intrinsics.adoc
@@ -0,0 +1,262 @@
+
+=== BFloat16 Vector Loads and Stores Intrinsics
+
+[[bf16-vector-unit-stride-load]]
+==== Vector Unit-Stride Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vle16_v_bf16mf4(const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16_v_bf16mf2(const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16_v_bf16m1(const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_v_bf16m2(const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16_v_bf16m4(const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16_v_bf16m8(const __bf16 *rs1, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                         size_t vl);
+vbfloat16mf2_t __riscv_vle16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                         size_t vl);
+vbfloat16m1_t __riscv_vle16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                       size_t vl);
+vbfloat16m2_t __riscv_vle16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                       size_t vl);
+vbfloat16m4_t __riscv_vle16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                       size_t vl);
+vbfloat16m8_t __riscv_vle16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                       size_t vl);
+----
+
+[[bf16-vector-unit-stride-store]]
+==== Vector Unit-Stride Store Intrinsics
+
+[,c]
+----
+void __riscv_vse16_v_bf16mf4(__bf16 *rs1, vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vse16_v_bf16mf2(__bf16 *rs1, vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vse16_v_bf16m1(__bf16 *rs1, vbfloat16m1_t vs3, size_t vl);
+void __riscv_vse16_v_bf16m2(__bf16 *rs1, vbfloat16m2_t vs3, size_t vl);
+void __riscv_vse16_v_bf16m4(__bf16 *rs1, vbfloat16m4_t vs3, size_t vl);
+void __riscv_vse16_v_bf16m8(__bf16 *rs1, vbfloat16m8_t vs3, size_t vl);
+// masked functions
+void __riscv_vse16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vbfloat16mf4_t vs3,
+                               size_t vl);
+void __riscv_vse16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vbfloat16mf2_t vs3,
+                               size_t vl);
+void __riscv_vse16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1_t vs3,
+                              size_t vl);
+void __riscv_vse16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2_t vs3,
+                              size_t vl);
+void __riscv_vse16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vbfloat16m4_t vs3,
+                              size_t vl);
+void __riscv_vse16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vbfloat16m8_t vs3,
+                              size_t vl);
+----
+
+[[vector-strided-load]]
+==== Vector Strided Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vlse16_v_bf16mf4(const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2_t __riscv_vlse16_v_bf16mf2(const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1_t __riscv_vlse16_v_bf16m1(const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m2_t __riscv_vlse16_v_bf16m2(const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m4_t __riscv_vlse16_v_bf16m4(const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m8_t __riscv_vlse16_v_bf16m8(const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vlse16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                          ptrdiff_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vlse16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                          ptrdiff_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vlse16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                        ptrdiff_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vlse16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                        ptrdiff_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vlse16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                        ptrdiff_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vlse16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                        ptrdiff_t rs2, size_t vl);
+----
+
+[[vector-strided-store]]
+==== Vector Strided Store Intrinsics
+
+[,c]
+----
+void __riscv_vsse16_v_bf16mf4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4_t vs3,
+                              size_t vl);
+void __riscv_vsse16_v_bf16mf2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2_t vs3,
+                              size_t vl);
+void __riscv_vsse16_v_bf16m1(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1_t vs3,
+                             size_t vl);
+void __riscv_vsse16_v_bf16m2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2_t vs3,
+                             size_t vl);
+void __riscv_vsse16_v_bf16m4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m4_t vs3,
+                             size_t vl);
+void __riscv_vsse16_v_bf16m8(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m8_t vs3,
+                             size_t vl);
+// masked functions
+void __riscv_vsse16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsse16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsse16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                               vbfloat16m1_t vs3, size_t vl);
+void __riscv_vsse16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                               vbfloat16m2_t vs3, size_t vl);
+void __riscv_vsse16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                               vbfloat16m4_t vs3, size_t vl);
+void __riscv_vsse16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                               vbfloat16m8_t vs3, size_t vl);
+----
+
+[[vector-indexed-load]]
+==== Vector Indexed Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vloxei16_v_bf16mf4(const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_v_bf16mf2(const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16m1_t __riscv_vloxei16_v_bf16m1(const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m2_t __riscv_vloxei16_v_bf16m2(const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m4_t __riscv_vloxei16_v_bf16m4(const __bf16 *rs1, vuint16m4_t rs2,
+                                        size_t vl);
+vbfloat16m8_t __riscv_vloxei16_v_bf16m8(const __bf16 *rs1, vuint16m8_t rs2,
+                                        size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_v_bf16mf4(const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_v_bf16mf2(const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16m1_t __riscv_vluxei16_v_bf16m1(const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m2_t __riscv_vluxei16_v_bf16m2(const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m4_t __riscv_vluxei16_v_bf16m4(const __bf16 *rs1, vuint16m4_t rs2,
+                                        size_t vl);
+vbfloat16m8_t __riscv_vluxei16_v_bf16m8(const __bf16 *rs1, vuint16m8_t rs2,
+                                        size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                            vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                            vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                          vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vloxei16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                          vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vloxei16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                          vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vloxei16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                          vuint16m8_t rs2, size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                            vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                            vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                          vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vluxei16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                          vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vluxei16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                          vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vluxei16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                          vuint16m8_t rs2, size_t vl);
+----
+
+[[vector-indexed-store]]
+==== Vector Indexed Store Intrinsics
+
+[,c]
+----
+void __riscv_vsoxei16_v_bf16mf4(__bf16 *rs1, vuint16mf4_t rs2,
+                                vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsoxei16_v_bf16mf2(__bf16 *rs1, vuint16mf2_t rs2,
+                                vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsoxei16_v_bf16m1(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                               size_t vl);
+void __riscv_vsoxei16_v_bf16m2(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                               size_t vl);
+void __riscv_vsoxei16_v_bf16m4(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                               size_t vl);
+void __riscv_vsoxei16_v_bf16m8(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                               size_t vl);
+void __riscv_vsuxei16_v_bf16mf4(__bf16 *rs1, vuint16mf4_t rs2,
+                                vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16mf2(__bf16 *rs1, vuint16mf2_t rs2,
+                                vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16m1(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                               size_t vl);
+void __riscv_vsuxei16_v_bf16m2(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                               size_t vl);
+void __riscv_vsuxei16_v_bf16m4(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                               size_t vl);
+void __riscv_vsuxei16_v_bf16m8(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                               size_t vl);
+// masked functions
+void __riscv_vsoxei16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                                  vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsoxei16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                                  vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsoxei16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                                 vbfloat16m1_t vs3, size_t vl);
+void __riscv_vsoxei16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                                 vbfloat16m2_t vs3, size_t vl);
+void __riscv_vsoxei16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                                 vbfloat16m4_t vs3, size_t vl);
+void __riscv_vsoxei16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                                 vbfloat16m8_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                                  vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                                  vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                                 vbfloat16m1_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                                 vbfloat16m2_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                                 vbfloat16m4_t vs3, size_t vl);
+void __riscv_vsuxei16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                                 vbfloat16m8_t vs3, size_t vl);
+----
+
+[[unit-stride-fault-only-first-loads]]
+==== Unit-stride Fault-Only-First Loads Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vle16ff_v_bf16mf4(const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_v_bf16mf2(const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1_t __riscv_vle16ff_v_bf16m1(const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m2_t __riscv_vle16ff_v_bf16m2(const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m4_t __riscv_vle16ff_v_bf16m4(const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m8_t __riscv_vle16ff_v_bf16m8(const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                           size_t *new_vl, size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                           size_t *new_vl, size_t vl);
+vbfloat16m1_t __riscv_vle16ff_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                         size_t *new_vl, size_t vl);
+vbfloat16m2_t __riscv_vle16ff_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                         size_t *new_vl, size_t vl);
+vbfloat16m4_t __riscv_vle16ff_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                         size_t *new_vl, size_t vl);
+vbfloat16m8_t __riscv_vle16ff_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                         size_t *new_vl, size_t vl);
+----
diff --git a/auto-generated/bfloat16/intrinsic_funcs/01_bfloat16_vector_loads_and_stores_segment_intrinsics.adoc b/auto-generated/bfloat16/intrinsic_funcs/01_bfloat16_vector_loads_and_stores_segment_intrinsics.adoc
new file mode 100644
index 000000000..48e19775a
--- /dev/null
+++ b/auto-generated/bfloat16/intrinsic_funcs/01_bfloat16_vector_loads_and_stores_segment_intrinsics.adoc
@@ -0,0 +1,1077 @@
+
+=== BFloat16 Vector Loads and Stores Segment Intrinsics
+
+[[vector-unit-stride-segment-load]]
+==== Vector Unit-Stride Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vlseg2e16_v_bf16mf4x2(const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_v_bf16mf4x3(const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_v_bf16mf4x4(const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_v_bf16mf4x5(const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_v_bf16mf4x6(const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_v_bf16mf4x7(const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_v_bf16mf4x8(const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_v_bf16mf2x2(const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_v_bf16mf2x3(const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_v_bf16mf2x4(const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_v_bf16mf2x5(const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_v_bf16mf2x6(const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_v_bf16mf2x7(const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_v_bf16mf2x8(const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_v_bf16m1x2(const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_v_bf16m1x3(const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_v_bf16m1x4(const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_v_bf16m1x5(const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_v_bf16m1x6(const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_v_bf16m1x7(const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_v_bf16m1x8(const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_v_bf16m2x2(const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_v_bf16m2x3(const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_v_bf16m2x4(const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_v_bf16m4x2(const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_v_bf16mf4x2(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_v_bf16mf4x3(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_v_bf16mf4x4(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_v_bf16mf4x5(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_v_bf16mf4x6(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_v_bf16mf4x7(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_v_bf16mf4x8(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_v_bf16mf2x2(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_v_bf16mf2x3(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_v_bf16mf2x4(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_v_bf16mf2x5(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_v_bf16mf2x6(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_v_bf16mf2x7(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_v_bf16mf2x8(const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_v_bf16m1x2(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_v_bf16m1x3(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_v_bf16m1x4(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_v_bf16m1x5(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_v_bf16m1x6(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_v_bf16m1x7(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_v_bf16m1x8(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_v_bf16m2x2(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_v_bf16m2x3(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_v_bf16m2x4(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_v_bf16m4x2(const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16_v_bf16mf4x2_m(vbool64_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_v_bf16mf4x3_m(vbool64_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_v_bf16mf4x4_m(vbool64_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_v_bf16mf4x5_m(vbool64_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_v_bf16mf4x6_m(vbool64_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_v_bf16mf4x7_m(vbool64_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_v_bf16mf4x8_m(vbool64_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_v_bf16mf2x2_m(vbool32_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_v_bf16mf2x3_m(vbool32_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_v_bf16mf2x4_m(vbool32_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_v_bf16mf2x5_m(vbool32_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_v_bf16mf2x6_m(vbool32_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_v_bf16mf2x7_m(vbool32_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_v_bf16mf2x8_m(vbool32_t vm,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                               size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_v_bf16mf4x2_m(vbool64_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_v_bf16mf4x3_m(vbool64_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_v_bf16mf4x4_m(vbool64_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_v_bf16mf4x5_m(vbool64_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_v_bf16mf4x6_m(vbool64_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_v_bf16mf4x7_m(vbool64_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_v_bf16mf4x8_m(vbool64_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_v_bf16mf2x2_m(vbool32_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_v_bf16mf2x3_m(vbool32_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_v_bf16mf2x4_m(vbool32_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_v_bf16mf2x5_m(vbool32_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_v_bf16mf2x6_m(vbool32_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_v_bf16mf2x7_m(vbool32_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_v_bf16mf2x8_m(vbool32_t vm,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_v_bf16m1x2_m(vbool16_t vm,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_v_bf16m1x3_m(vbool16_t vm,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_v_bf16m1x4_m(vbool16_t vm,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_v_bf16m1x5_m(vbool16_t vm,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_v_bf16m1x6_m(vbool16_t vm,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_v_bf16m1x7_m(vbool16_t vm,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_v_bf16m1x8_m(vbool16_t vm,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl);
+----
+
+[[vecrtor-unit-stride-segment-store]]
+==== Vector Unit-Stride Segment Store Intrinsics
+
+[,c]
+----
+void __riscv_vsseg2e16_v_bf16mf4x2(__bf16 *rs1, vbfloat16mf4x2_t vs3,
+                                   size_t vl);
+void __riscv_vsseg3e16_v_bf16mf4x3(__bf16 *rs1, vbfloat16mf4x3_t vs3,
+                                   size_t vl);
+void __riscv_vsseg4e16_v_bf16mf4x4(__bf16 *rs1, vbfloat16mf4x4_t vs3,
+                                   size_t vl);
+void __riscv_vsseg5e16_v_bf16mf4x5(__bf16 *rs1, vbfloat16mf4x5_t vs3,
+                                   size_t vl);
+void __riscv_vsseg6e16_v_bf16mf4x6(__bf16 *rs1, vbfloat16mf4x6_t vs3,
+                                   size_t vl);
+void __riscv_vsseg7e16_v_bf16mf4x7(__bf16 *rs1, vbfloat16mf4x7_t vs3,
+                                   size_t vl);
+void __riscv_vsseg8e16_v_bf16mf4x8(__bf16 *rs1, vbfloat16mf4x8_t vs3,
+                                   size_t vl);
+void __riscv_vsseg2e16_v_bf16mf2x2(__bf16 *rs1, vbfloat16mf2x2_t vs3,
+                                   size_t vl);
+void __riscv_vsseg3e16_v_bf16mf2x3(__bf16 *rs1, vbfloat16mf2x3_t vs3,
+                                   size_t vl);
+void __riscv_vsseg4e16_v_bf16mf2x4(__bf16 *rs1, vbfloat16mf2x4_t vs3,
+                                   size_t vl);
+void __riscv_vsseg5e16_v_bf16mf2x5(__bf16 *rs1, vbfloat16mf2x5_t vs3,
+                                   size_t vl);
+void __riscv_vsseg6e16_v_bf16mf2x6(__bf16 *rs1, vbfloat16mf2x6_t vs3,
+                                   size_t vl);
+void __riscv_vsseg7e16_v_bf16mf2x7(__bf16 *rs1, vbfloat16mf2x7_t vs3,
+                                   size_t vl);
+void __riscv_vsseg8e16_v_bf16mf2x8(__bf16 *rs1, vbfloat16mf2x8_t vs3,
+                                   size_t vl);
+void __riscv_vsseg2e16_v_bf16m1x2(__bf16 *rs1, vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vsseg3e16_v_bf16m1x3(__bf16 *rs1, vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vsseg4e16_v_bf16m1x4(__bf16 *rs1, vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vsseg5e16_v_bf16m1x5(__bf16 *rs1, vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vsseg6e16_v_bf16m1x6(__bf16 *rs1, vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vsseg7e16_v_bf16m1x7(__bf16 *rs1, vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vsseg8e16_v_bf16m1x8(__bf16 *rs1, vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vsseg2e16_v_bf16m2x2(__bf16 *rs1, vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vsseg3e16_v_bf16m2x3(__bf16 *rs1, vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vsseg4e16_v_bf16m2x4(__bf16 *rs1, vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vsseg2e16_v_bf16m4x2(__bf16 *rs1, vbfloat16m4x2_t vs3, size_t vl);
+// masked functions
+void __riscv_vsseg2e16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                     vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vsseg3e16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                     vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vsseg4e16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                     vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vsseg5e16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                     vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vsseg6e16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                     vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vsseg7e16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                     vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vsseg8e16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                     vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vsseg2e16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                     vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vsseg3e16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                     vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vsseg4e16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                     vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vsseg5e16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                     vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vsseg6e16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                     vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vsseg7e16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                     vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vsseg8e16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                     vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vsseg2e16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1,
+                                    vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vsseg3e16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1,
+                                    vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vsseg4e16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1,
+                                    vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vsseg5e16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1,
+                                    vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vsseg6e16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1,
+                                    vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vsseg7e16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1,
+                                    vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vsseg8e16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1,
+                                    vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vsseg2e16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1,
+                                    vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vsseg3e16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1,
+                                    vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vsseg4e16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1,
+                                    vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vsseg2e16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1,
+                                    vbfloat16m4x2_t vs3, size_t vl);
+----
+
+[[vector-strided-segment-load]]
+==== Vector Strided Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vlsseg2e16_v_bf16mf4x2(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_v_bf16mf4x3(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_v_bf16mf4x4(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_v_bf16mf4x5(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_v_bf16mf4x6(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_v_bf16mf4x7(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_v_bf16mf4x8(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_v_bf16mf2x2(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_v_bf16mf2x3(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_v_bf16mf2x4(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_v_bf16mf2x5(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_v_bf16mf2x6(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_v_bf16mf2x7(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_v_bf16mf2x8(const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_v_bf16m1x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_v_bf16m1x3(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_v_bf16m1x4(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_v_bf16m1x5(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_v_bf16m1x6(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_v_bf16m1x7(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_v_bf16m1x8(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_v_bf16m2x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_v_bf16m2x3(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_v_bf16m2x4(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_v_bf16m4x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16_v_bf16mf4x2_m(vbool64_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_v_bf16mf4x3_m(vbool64_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_v_bf16mf4x4_m(vbool64_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_v_bf16mf4x5_m(vbool64_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_v_bf16mf4x6_m(vbool64_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_v_bf16mf4x7_m(vbool64_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_v_bf16mf4x8_m(vbool64_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_v_bf16mf2x2_m(vbool32_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_v_bf16mf2x3_m(vbool32_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_v_bf16mf2x4_m(vbool32_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_v_bf16mf2x5_m(vbool32_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_v_bf16mf2x6_m(vbool32_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_v_bf16mf2x7_m(vbool32_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_v_bf16mf2x8_m(vbool32_t vm,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl);
+----
+
+[[vector-strided-segment-store]]
+==== Vector Strided Segment Store Intrinsics
+
+[,c]
+----
+void __riscv_vssseg2e16_v_bf16mf4x2(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16mf4x3(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16mf4x4(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vssseg5e16_v_bf16mf4x5(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vssseg6e16_v_bf16mf4x6(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vssseg7e16_v_bf16mf4x7(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vssseg8e16_v_bf16mf4x8(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16mf2x2(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16mf2x3(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16mf2x4(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vssseg5e16_v_bf16mf2x5(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vssseg6e16_v_bf16mf2x6(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vssseg7e16_v_bf16mf2x7(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vssseg8e16_v_bf16mf2x8(__bf16 *rs1, ptrdiff_t rs2,
+                                    vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16m1x2(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16m1x3(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16m1x4(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vssseg5e16_v_bf16m1x5(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vssseg6e16_v_bf16m1x6(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vssseg7e16_v_bf16m1x7(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vssseg8e16_v_bf16m1x8(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16m2x2(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16m2x3(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16m2x4(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16m4x2(__bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16m4x2_t vs3, size_t vl);
+// masked functions
+void __riscv_vssseg2e16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vssseg5e16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vssseg6e16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vssseg7e16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vssseg8e16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vssseg5e16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vssseg6e16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vssseg7e16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vssseg8e16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                      vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vssseg5e16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vssseg6e16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vssseg7e16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vssseg8e16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vssseg3e16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vssseg4e16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vssseg2e16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                     vbfloat16m4x2_t vs3, size_t vl);
+----
+
+[[vector-indexed-segment-load]]
+==== Vector Indexed Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_v_bf16mf4x2(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_v_bf16mf4x3(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_v_bf16mf4x4(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_v_bf16mf4x5(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_v_bf16mf4x6(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_v_bf16mf4x7(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_v_bf16mf4x8(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_v_bf16mf2x2(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_v_bf16mf2x3(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_v_bf16mf2x4(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_v_bf16mf2x5(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_v_bf16mf2x6(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_v_bf16mf2x7(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_v_bf16mf2x8(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_v_bf16m1x2(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_v_bf16m1x3(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_v_bf16m1x4(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_v_bf16m1x5(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_v_bf16m1x6(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_v_bf16m1x7(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_v_bf16m1x8(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_v_bf16m2x2(const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_v_bf16m2x3(const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_v_bf16m2x4(const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_v_bf16m4x2(const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_v_bf16mf4x2(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_v_bf16mf4x3(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_v_bf16mf4x4(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_v_bf16mf4x5(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_v_bf16mf4x6(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_v_bf16mf4x7(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_v_bf16mf4x8(const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_v_bf16mf2x2(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_v_bf16mf2x3(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_v_bf16mf2x4(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_v_bf16mf2x5(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_v_bf16mf2x6(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_v_bf16mf2x7(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_v_bf16mf2x8(const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_v_bf16m1x2(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_v_bf16m1x3(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_v_bf16m1x4(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_v_bf16m1x5(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_v_bf16m1x6(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_v_bf16m1x7(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_v_bf16m1x8(const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_v_bf16m2x2(const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_v_bf16m2x3(const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_v_bf16m2x4(const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_v_bf16m4x2(const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_v_bf16mf4x2_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_v_bf16mf4x3_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_v_bf16mf4x4_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_v_bf16mf4x5_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_v_bf16mf4x6_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_v_bf16mf4x7_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_v_bf16mf4x8_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_v_bf16mf2x2_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_v_bf16mf2x3_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_v_bf16mf2x4_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_v_bf16mf2x5_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_v_bf16mf2x6_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_v_bf16mf2x7_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_v_bf16mf2x8_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_v_bf16m1x2_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_v_bf16m1x3_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_v_bf16m1x4_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_v_bf16m1x5_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_v_bf16m1x6_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_v_bf16m1x7_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_v_bf16m1x8_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_v_bf16m2x2_m(vbool8_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_v_bf16m2x3_m(vbool8_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_v_bf16m2x4_m(vbool8_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_v_bf16m4x2_m(vbool4_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m4_t rs2, size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_v_bf16mf4x2_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_v_bf16mf4x3_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_v_bf16mf4x4_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_v_bf16mf4x5_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_v_bf16mf4x6_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_v_bf16mf4x7_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_v_bf16mf4x8_m(vbool64_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_v_bf16mf2x2_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_v_bf16mf2x3_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_v_bf16mf2x4_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_v_bf16mf2x5_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_v_bf16mf2x6_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_v_bf16mf2x7_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_v_bf16mf2x8_m(vbool32_t vm,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_v_bf16m1x2_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_v_bf16m1x3_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_v_bf16m1x4_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_v_bf16m1x5_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_v_bf16m1x6_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_v_bf16m1x7_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_v_bf16m1x8_m(vbool16_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_v_bf16m2x2_m(vbool8_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_v_bf16m2x3_m(vbool8_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_v_bf16m2x4_m(vbool8_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_v_bf16m4x2_m(vbool4_t vm,
+                                                  const __bf16 *rs1,
+                                                  vuint16m4_t rs2, size_t vl);
+----
+
+[[vector-indexed-segment-store]]
+==== Vector Indexed Segment Store Intrinsics
+
+[,c]
+----
+void __riscv_vsoxseg2ei16_v_bf16mf4x2(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16mf4x3(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16mf4x4(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vsoxseg5ei16_v_bf16mf4x5(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vsoxseg6ei16_v_bf16mf4x6(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vsoxseg7ei16_v_bf16mf4x7(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vsoxseg8ei16_v_bf16mf4x8(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16mf2x2(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16mf2x3(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16mf2x4(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vsoxseg5ei16_v_bf16mf2x5(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vsoxseg6ei16_v_bf16mf2x6(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vsoxseg7ei16_v_bf16mf2x7(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vsoxseg8ei16_v_bf16mf2x8(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16m1x2(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16m1x3(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16m1x4(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vsoxseg5ei16_v_bf16m1x5(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vsoxseg6ei16_v_bf16m1x6(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vsoxseg7ei16_v_bf16m1x7(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vsoxseg8ei16_v_bf16m1x8(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16m2x2(__bf16 *rs1, vuint16m2_t vs2,
+                                     vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16m2x3(__bf16 *rs1, vuint16m2_t vs2,
+                                     vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16m2x4(__bf16 *rs1, vuint16m2_t vs2,
+                                     vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16m4x2(__bf16 *rs1, vuint16m4_t vs2,
+                                     vbfloat16m4x2_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16mf4x2(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16mf4x3(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16mf4x4(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vsuxseg5ei16_v_bf16mf4x5(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vsuxseg6ei16_v_bf16mf4x6(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vsuxseg7ei16_v_bf16mf4x7(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vsuxseg8ei16_v_bf16mf4x8(__bf16 *rs1, vuint16mf4_t vs2,
+                                      vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16mf2x2(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16mf2x3(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16mf2x4(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vsuxseg5ei16_v_bf16mf2x5(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vsuxseg6ei16_v_bf16mf2x6(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vsuxseg7ei16_v_bf16mf2x7(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vsuxseg8ei16_v_bf16mf2x8(__bf16 *rs1, vuint16mf2_t vs2,
+                                      vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16m1x2(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16m1x3(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16m1x4(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vsuxseg5ei16_v_bf16m1x5(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vsuxseg6ei16_v_bf16m1x6(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vsuxseg7ei16_v_bf16m1x7(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vsuxseg8ei16_v_bf16m1x8(__bf16 *rs1, vuint16m1_t vs2,
+                                     vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16m2x2(__bf16 *rs1, vuint16m2_t vs2,
+                                     vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16m2x3(__bf16 *rs1, vuint16m2_t vs2,
+                                     vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16m2x4(__bf16 *rs1, vuint16m2_t vs2,
+                                     vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16m4x2(__bf16 *rs1, vuint16m4_t vs2,
+                                     vbfloat16m4x2_t vs3, size_t vl);
+// masked functions
+void __riscv_vsoxseg2ei16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg5ei16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg6ei16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg7ei16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg8ei16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg5ei16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg6ei16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg7ei16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg8ei16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                                        size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x2_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x3_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x4_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg5ei16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x5_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg6ei16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x6_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg7ei16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x7_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg8ei16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x8_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1,
+                                       vuint16m2_t vs2, vbfloat16m2x2_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg3ei16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1,
+                                       vuint16m2_t vs2, vbfloat16m2x3_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg4ei16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1,
+                                       vuint16m2_t vs2, vbfloat16m2x4_t vs3,
+                                       size_t vl);
+void __riscv_vsoxseg2ei16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1,
+                                       vuint16m4_t vs2, vbfloat16m4x2_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg5ei16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg6ei16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg7ei16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg8ei16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                        vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg5ei16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg6ei16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg7ei16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg8ei16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                        vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                                        size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x2_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x3_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x4_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg5ei16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x5_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg6ei16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x6_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg7ei16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x7_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg8ei16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1,
+                                       vuint16m1_t vs2, vbfloat16m1x8_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1,
+                                       vuint16m2_t vs2, vbfloat16m2x2_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg3ei16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1,
+                                       vuint16m2_t vs2, vbfloat16m2x3_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg4ei16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1,
+                                       vuint16m2_t vs2, vbfloat16m2x4_t vs3,
+                                       size_t vl);
+void __riscv_vsuxseg2ei16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1,
+                                       vuint16m4_t vs2, vbfloat16m4x2_t vs3,
+                                       size_t vl);
+----
diff --git a/auto-generated/bfloat16/intrinsic_funcs/02_bfloat16_convert_intrinsics.adoc b/auto-generated/bfloat16/intrinsic_funcs/02_bfloat16_convert_intrinsics.adoc
new file mode 100644
index 000000000..a6e7b0277
--- /dev/null
+++ b/auto-generated/bfloat16/intrinsic_funcs/02_bfloat16_convert_intrinsics.adoc
@@ -0,0 +1,76 @@
+
+=== BFloat16 Convert Intrinsics
+
+[[bf16-vector-narrow-convert]]
+==== Vector Narrowing Convert Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4(vfloat32mf2_t vs2, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2(vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1(vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2(vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4(vfloat32m8_t vs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_m(vbool64_t vm,
+                                                  vfloat32mf2_t vs2, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_m(vbool32_t vm,
+                                                  vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_m(vbool16_t vm, vfloat32m2_t vs2,
+                                                size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_m(vbool8_t vm, vfloat32m4_t vs2,
+                                                size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_m(vbool4_t vm, vfloat32m8_t vs2,
+                                                size_t vl);
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_rm(vfloat32mf2_t vs2,
+                                                   unsigned int frm, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_rm(vfloat32m1_t vs2,
+                                                   unsigned int frm, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_rm(vfloat32m2_t vs2,
+                                                 unsigned int frm, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_rm(vfloat32m4_t vs2,
+                                                 unsigned int frm, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_rm(vfloat32m8_t vs2,
+                                                 unsigned int frm, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_m(vbool64_t vm,
+                                                     vfloat32mf2_t vs2,
+                                                     unsigned int frm,
+                                                     size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_m(vbool32_t vm,
+                                                     vfloat32m1_t vs2,
+                                                     unsigned int frm,
+                                                     size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_rm_m(vbool16_t vm,
+                                                   vfloat32m2_t vs2,
+                                                   unsigned int frm, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_rm_m(vbool8_t vm,
+                                                   vfloat32m4_t vs2,
+                                                   unsigned int frm, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_rm_m(vbool4_t vm,
+                                                   vfloat32m8_t vs2,
+                                                   unsigned int frm, size_t vl);
+----
+
+[[bf16-vector-widening-convert]]
+==== Vector Widening Convert Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwcvtbf16_f_f_v_f32mf2(vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_f_v_f32m1(vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_f_v_f32m2(vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_f_v_f32m4(vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_f_v_f32m8(vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f_f_v_f32mf2_m(vbool64_t vm,
+                                                vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_f_v_f32m1_m(vbool32_t vm, vbfloat16mf2_t vs2,
+                                              size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_f_v_f32m2_m(vbool16_t vm, vbfloat16m1_t vs2,
+                                              size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_f_v_f32m4_m(vbool8_t vm, vbfloat16m2_t vs2,
+                                              size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_f_v_f32m8_m(vbool4_t vm, vbfloat16m4_t vs2,
+                                              size_t vl);
+----
diff --git a/auto-generated/bfloat16/intrinsic_funcs/03_bfloat16_arithmetic_intrinsics.adoc b/auto-generated/bfloat16/intrinsic_funcs/03_bfloat16_arithmetic_intrinsics.adoc
new file mode 100644
index 000000000..830e11a4b
--- /dev/null
+++ b/auto-generated/bfloat16/intrinsic_funcs/03_bfloat16_arithmetic_intrinsics.adoc
@@ -0,0 +1,129 @@
+
+=== BFloat16 Arithmetic Intrinsics
+
+[[bf16-widening-multiply-accumulate]]
+==== Vector Widening Multiply-Accumulate Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2(vfloat32mf2_t vd,
+                                            vbfloat16mf4_t vs1,
+                                            vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2(vfloat32mf2_t vd, __bf16 vs1,
+                                            vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                          vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1(vfloat32m1_t vd, __bf16 vs1,
+                                          vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                          vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2(vfloat32m2_t vd, __bf16 vs1,
+                                          vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                          vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4(vfloat32m4_t vd, __bf16 vs1,
+                                          vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                          vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8(vfloat32m8_t vd, __bf16 vs1,
+                                          vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_m(vbool64_t vm, vfloat32mf2_t vd,
+                                              vbfloat16mf4_t vs1,
+                                              vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_m(vbool64_t vm, vfloat32mf2_t vd,
+                                              __bf16 vs1, vbfloat16mf4_t vs2,
+                                              size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_m(vbool32_t vm, vfloat32m1_t vd,
+                                            vbfloat16mf2_t vs1,
+                                            vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_m(vbool32_t vm, vfloat32m1_t vd,
+                                            __bf16 vs1, vbfloat16mf2_t vs2,
+                                            size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_m(vbool16_t vm, vfloat32m2_t vd,
+                                            vbfloat16m1_t vs1,
+                                            vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_m(vbool16_t vm, vfloat32m2_t vd,
+                                            __bf16 vs1, vbfloat16m1_t vs2,
+                                            size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_m(vbool8_t vm, vfloat32m4_t vd,
+                                            vbfloat16m2_t vs1,
+                                            vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_m(vbool8_t vm, vfloat32m4_t vd,
+                                            __bf16 vs1, vbfloat16m2_t vs2,
+                                            size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_m(vbool4_t vm, vfloat32m8_t vd,
+                                            vbfloat16m4_t vs1,
+                                            vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_m(vbool4_t vm, vfloat32m8_t vd,
+                                            __bf16 vs1, vbfloat16m4_t vs2,
+                                            size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_rm(vfloat32mf2_t vd,
+                                               vbfloat16mf4_t vs1,
+                                               vbfloat16mf4_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_rm(vfloat32mf2_t vd, __bf16 vs1,
+                                               vbfloat16mf4_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_rm(vfloat32m1_t vd,
+                                             vbfloat16mf2_t vs1,
+                                             vbfloat16mf2_t vs2,
+                                             unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_rm(vfloat32m1_t vd, __bf16 vs1,
+                                             vbfloat16mf2_t vs2,
+                                             unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_rm(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                             vbfloat16m1_t vs2,
+                                             unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_rm(vfloat32m2_t vd, __bf16 vs1,
+                                             vbfloat16m1_t vs2,
+                                             unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_rm(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                             vbfloat16m2_t vs2,
+                                             unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_rm(vfloat32m4_t vd, __bf16 vs1,
+                                             vbfloat16m2_t vs2,
+                                             unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_rm(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                             vbfloat16m4_t vs2,
+                                             unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_rm(vfloat32m8_t vd, __bf16 vs1,
+                                             vbfloat16m4_t vs2,
+                                             unsigned int frm, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_rm_m(vbool64_t vm, vfloat32mf2_t vd,
+                                                 vbfloat16mf4_t vs1,
+                                                 vbfloat16mf4_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_rm_m(vbool64_t vm, vfloat32mf2_t vd,
+                                                 __bf16 vs1, vbfloat16mf4_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_rm_m(vbool32_t vm, vfloat32m1_t vd,
+                                               vbfloat16mf2_t vs1,
+                                               vbfloat16mf2_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_rm_m(vbool32_t vm, vfloat32m1_t vd,
+                                               __bf16 vs1, vbfloat16mf2_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_rm_m(vbool16_t vm, vfloat32m2_t vd,
+                                               vbfloat16m1_t vs1,
+                                               vbfloat16m1_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_rm_m(vbool16_t vm, vfloat32m2_t vd,
+                                               __bf16 vs1, vbfloat16m1_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_rm_m(vbool8_t vm, vfloat32m4_t vd,
+                                               vbfloat16m2_t vs1,
+                                               vbfloat16m2_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_rm_m(vbool8_t vm, vfloat32m4_t vd,
+                                               __bf16 vs1, vbfloat16m2_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_rm_m(vbool4_t vm, vfloat32m8_t vd,
+                                               vbfloat16m4_t vs1,
+                                               vbfloat16m4_t vs2,
+                                               unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_rm_m(vbool4_t vm, vfloat32m8_t vd,
+                                               __bf16 vs1, vbfloat16m4_t vs2,
+                                               unsigned int frm, size_t vl);
+----
diff --git a/auto-generated/bfloat16/intrinsic_funcs/04_bfloat16_miscellaneous_vector_utility_intrinsics.adoc b/auto-generated/bfloat16/intrinsic_funcs/04_bfloat16_miscellaneous_vector_utility_intrinsics.adoc
new file mode 100644
index 000000000..9843290f7
--- /dev/null
+++ b/auto-generated/bfloat16/intrinsic_funcs/04_bfloat16_miscellaneous_vector_utility_intrinsics.adoc
@@ -0,0 +1,359 @@
+
+=== BFloat16 Miscellaneous Vector Utility Intrinsics
+
+[[reinterpret-cast-conversion]]
+==== Reinterpret Cast Conversion Intrinsics
+
+[,c]
+----
+// Reinterpret between different type under the same SEW/LMUL
+vbfloat16mf4_t __riscv_vreinterpret_v_i16mf4_bf16mf4(vint16mf4_t src);
+vbfloat16mf2_t __riscv_vreinterpret_v_i16mf2_bf16mf2(vint16mf2_t src);
+vbfloat16m1_t __riscv_vreinterpret_v_i16m1_bf16m1(vint16m1_t src);
+vbfloat16m2_t __riscv_vreinterpret_v_i16m2_bf16m2(vint16m2_t src);
+vbfloat16m4_t __riscv_vreinterpret_v_i16m4_bf16m4(vint16m4_t src);
+vbfloat16m8_t __riscv_vreinterpret_v_i16m8_bf16m8(vint16m8_t src);
+vbfloat16mf4_t __riscv_vreinterpret_v_u16mf4_bf16mf4(vuint16mf4_t src);
+vbfloat16mf2_t __riscv_vreinterpret_v_u16mf2_bf16mf2(vuint16mf2_t src);
+vbfloat16m1_t __riscv_vreinterpret_v_u16m1_bf16m1(vuint16m1_t src);
+vbfloat16m2_t __riscv_vreinterpret_v_u16m2_bf16m2(vuint16m2_t src);
+vbfloat16m4_t __riscv_vreinterpret_v_u16m4_bf16m4(vuint16m4_t src);
+vbfloat16m8_t __riscv_vreinterpret_v_u16m8_bf16m8(vuint16m8_t src);
+vint16mf4_t __riscv_vreinterpret_v_bf16mf4_i16mf4(vbfloat16mf4_t src);
+vint16mf2_t __riscv_vreinterpret_v_bf16mf2_i16mf2(vbfloat16mf2_t src);
+vint16m1_t __riscv_vreinterpret_v_bf16m1_i16m1(vbfloat16m1_t src);
+vint16m2_t __riscv_vreinterpret_v_bf16m2_i16m2(vbfloat16m2_t src);
+vint16m4_t __riscv_vreinterpret_v_bf16m4_i16m4(vbfloat16m4_t src);
+vint16m8_t __riscv_vreinterpret_v_bf16m8_i16m8(vbfloat16m8_t src);
+vuint16mf4_t __riscv_vreinterpret_v_bf16mf4_u16mf4(vbfloat16mf4_t src);
+vuint16mf2_t __riscv_vreinterpret_v_bf16mf2_u16mf2(vbfloat16mf2_t src);
+vuint16m1_t __riscv_vreinterpret_v_bf16m1_u16m1(vbfloat16m1_t src);
+vuint16m2_t __riscv_vreinterpret_v_bf16m2_u16m2(vbfloat16m2_t src);
+vuint16m4_t __riscv_vreinterpret_v_bf16m4_u16m4(vbfloat16m4_t src);
+vuint16m8_t __riscv_vreinterpret_v_bf16m8_u16m8(vbfloat16m8_t src);
+----
+
+[[vector-lmul-extensionn]]
+==== Vector LMUL Extension Intrinsics
+
+[,c]
+----
+vbfloat16mf2_t __riscv_vlmul_ext_v_bf16mf4_bf16mf2(vbfloat16mf4_t value);
+vbfloat16m1_t __riscv_vlmul_ext_v_bf16mf4_bf16m1(vbfloat16mf4_t value);
+vbfloat16m2_t __riscv_vlmul_ext_v_bf16mf4_bf16m2(vbfloat16mf4_t value);
+vbfloat16m4_t __riscv_vlmul_ext_v_bf16mf4_bf16m4(vbfloat16mf4_t value);
+vbfloat16m8_t __riscv_vlmul_ext_v_bf16mf4_bf16m8(vbfloat16mf4_t value);
+vbfloat16m1_t __riscv_vlmul_ext_v_bf16mf2_bf16m1(vbfloat16mf2_t value);
+vbfloat16m2_t __riscv_vlmul_ext_v_bf16mf2_bf16m2(vbfloat16mf2_t value);
+vbfloat16m4_t __riscv_vlmul_ext_v_bf16mf2_bf16m4(vbfloat16mf2_t value);
+vbfloat16m8_t __riscv_vlmul_ext_v_bf16mf2_bf16m8(vbfloat16mf2_t value);
+vbfloat16m2_t __riscv_vlmul_ext_v_bf16m1_bf16m2(vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vlmul_ext_v_bf16m1_bf16m4(vbfloat16m1_t value);
+vbfloat16m8_t __riscv_vlmul_ext_v_bf16m1_bf16m8(vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vlmul_ext_v_bf16m2_bf16m4(vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vlmul_ext_v_bf16m2_bf16m8(vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vlmul_ext_v_bf16m4_bf16m8(vbfloat16m4_t value);
+----
+
+[[vector-lmul-truncation]]
+==== Vector LMUL Truncation Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vlmul_trunc_v_bf16mf2_bf16mf4(vbfloat16mf2_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_v_bf16m1_bf16mf4(vbfloat16m1_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_v_bf16m1_bf16mf2(vbfloat16m1_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_v_bf16m2_bf16mf4(vbfloat16m2_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_v_bf16m2_bf16mf2(vbfloat16m2_t value);
+vbfloat16m1_t __riscv_vlmul_trunc_v_bf16m2_bf16m1(vbfloat16m2_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_v_bf16m4_bf16mf4(vbfloat16m4_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_v_bf16m4_bf16mf2(vbfloat16m4_t value);
+vbfloat16m1_t __riscv_vlmul_trunc_v_bf16m4_bf16m1(vbfloat16m4_t value);
+vbfloat16m2_t __riscv_vlmul_trunc_v_bf16m4_bf16m2(vbfloat16m4_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_v_bf16m8_bf16mf4(vbfloat16m8_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_v_bf16m8_bf16mf2(vbfloat16m8_t value);
+vbfloat16m1_t __riscv_vlmul_trunc_v_bf16m8_bf16m1(vbfloat16m8_t value);
+vbfloat16m2_t __riscv_vlmul_trunc_v_bf16m8_bf16m2(vbfloat16m8_t value);
+vbfloat16m4_t __riscv_vlmul_trunc_v_bf16m8_bf16m4(vbfloat16m8_t value);
+----
+
+[[vector-initialization]]
+==== Vector Initialization Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vundefined_bf16mf4();
+vbfloat16mf2_t __riscv_vundefined_bf16mf2();
+vbfloat16m1_t __riscv_vundefined_bf16m1();
+vbfloat16m2_t __riscv_vundefined_bf16m2();
+vbfloat16m4_t __riscv_vundefined_bf16m4();
+vbfloat16m8_t __riscv_vundefined_bf16m8();
+vbfloat16mf4x2_t __riscv_vundefined_bf16mf4x2();
+vbfloat16mf4x3_t __riscv_vundefined_bf16mf4x3();
+vbfloat16mf4x4_t __riscv_vundefined_bf16mf4x4();
+vbfloat16mf4x5_t __riscv_vundefined_bf16mf4x5();
+vbfloat16mf4x6_t __riscv_vundefined_bf16mf4x6();
+vbfloat16mf4x7_t __riscv_vundefined_bf16mf4x7();
+vbfloat16mf4x8_t __riscv_vundefined_bf16mf4x8();
+vbfloat16mf2x2_t __riscv_vundefined_bf16mf2x2();
+vbfloat16mf2x3_t __riscv_vundefined_bf16mf2x3();
+vbfloat16mf2x4_t __riscv_vundefined_bf16mf2x4();
+vbfloat16mf2x5_t __riscv_vundefined_bf16mf2x5();
+vbfloat16mf2x6_t __riscv_vundefined_bf16mf2x6();
+vbfloat16mf2x7_t __riscv_vundefined_bf16mf2x7();
+vbfloat16mf2x8_t __riscv_vundefined_bf16mf2x8();
+vbfloat16m1x2_t __riscv_vundefined_bf16m1x2();
+vbfloat16m1x3_t __riscv_vundefined_bf16m1x3();
+vbfloat16m1x4_t __riscv_vundefined_bf16m1x4();
+vbfloat16m1x5_t __riscv_vundefined_bf16m1x5();
+vbfloat16m1x6_t __riscv_vundefined_bf16m1x6();
+vbfloat16m1x7_t __riscv_vundefined_bf16m1x7();
+vbfloat16m1x8_t __riscv_vundefined_bf16m1x8();
+vbfloat16m2x2_t __riscv_vundefined_bf16m2x2();
+vbfloat16m2x3_t __riscv_vundefined_bf16m2x3();
+vbfloat16m2x4_t __riscv_vundefined_bf16m2x4();
+vbfloat16m4x2_t __riscv_vundefined_bf16m4x2();
+----
+
+[[vector-insertion]]
+==== Vector Insertion Intrinsics
+
+[,c]
+----
+vbfloat16m2_t __riscv_vset_v_bf16m1_bf16m2(vbfloat16m2_t dest, size_t index,
+                                           vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vset_v_bf16m1_bf16m4(vbfloat16m4_t dest, size_t index,
+                                           vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vset_v_bf16m2_bf16m4(vbfloat16m4_t dest, size_t index,
+                                           vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vset_v_bf16m1_bf16m8(vbfloat16m8_t dest, size_t index,
+                                           vbfloat16m1_t value);
+vbfloat16m8_t __riscv_vset_v_bf16m2_bf16m8(vbfloat16m8_t dest, size_t index,
+                                           vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vset_v_bf16m4_bf16m8(vbfloat16m8_t dest, size_t index,
+                                           vbfloat16m4_t value);
+vbfloat16mf4x2_t __riscv_vset_v_bf16mf4_bf16mf4x2(vbfloat16mf4x2_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf4_t value);
+vbfloat16mf4x3_t __riscv_vset_v_bf16mf4_bf16mf4x3(vbfloat16mf4x3_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf4_t value);
+vbfloat16mf4x4_t __riscv_vset_v_bf16mf4_bf16mf4x4(vbfloat16mf4x4_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf4_t value);
+vbfloat16mf4x5_t __riscv_vset_v_bf16mf4_bf16mf4x5(vbfloat16mf4x5_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf4_t value);
+vbfloat16mf4x6_t __riscv_vset_v_bf16mf4_bf16mf4x6(vbfloat16mf4x6_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf4_t value);
+vbfloat16mf4x7_t __riscv_vset_v_bf16mf4_bf16mf4x7(vbfloat16mf4x7_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf4_t value);
+vbfloat16mf4x8_t __riscv_vset_v_bf16mf4_bf16mf4x8(vbfloat16mf4x8_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf4_t value);
+vbfloat16mf2x2_t __riscv_vset_v_bf16mf2_bf16mf2x2(vbfloat16mf2x2_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf2_t value);
+vbfloat16mf2x3_t __riscv_vset_v_bf16mf2_bf16mf2x3(vbfloat16mf2x3_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf2_t value);
+vbfloat16mf2x4_t __riscv_vset_v_bf16mf2_bf16mf2x4(vbfloat16mf2x4_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf2_t value);
+vbfloat16mf2x5_t __riscv_vset_v_bf16mf2_bf16mf2x5(vbfloat16mf2x5_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf2_t value);
+vbfloat16mf2x6_t __riscv_vset_v_bf16mf2_bf16mf2x6(vbfloat16mf2x6_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf2_t value);
+vbfloat16mf2x7_t __riscv_vset_v_bf16mf2_bf16mf2x7(vbfloat16mf2x7_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf2_t value);
+vbfloat16mf2x8_t __riscv_vset_v_bf16mf2_bf16mf2x8(vbfloat16mf2x8_t dest,
+                                                  size_t index,
+                                                  vbfloat16mf2_t value);
+vbfloat16m1x2_t __riscv_vset_v_bf16m1_bf16m1x2(vbfloat16m1x2_t dest,
+                                               size_t index,
+                                               vbfloat16m1_t value);
+vbfloat16m1x3_t __riscv_vset_v_bf16m1_bf16m1x3(vbfloat16m1x3_t dest,
+                                               size_t index,
+                                               vbfloat16m1_t value);
+vbfloat16m1x4_t __riscv_vset_v_bf16m1_bf16m1x4(vbfloat16m1x4_t dest,
+                                               size_t index,
+                                               vbfloat16m1_t value);
+vbfloat16m1x5_t __riscv_vset_v_bf16m1_bf16m1x5(vbfloat16m1x5_t dest,
+                                               size_t index,
+                                               vbfloat16m1_t value);
+vbfloat16m1x6_t __riscv_vset_v_bf16m1_bf16m1x6(vbfloat16m1x6_t dest,
+                                               size_t index,
+                                               vbfloat16m1_t value);
+vbfloat16m1x7_t __riscv_vset_v_bf16m1_bf16m1x7(vbfloat16m1x7_t dest,
+                                               size_t index,
+                                               vbfloat16m1_t value);
+vbfloat16m1x8_t __riscv_vset_v_bf16m1_bf16m1x8(vbfloat16m1x8_t dest,
+                                               size_t index,
+                                               vbfloat16m1_t value);
+vbfloat16m2x2_t __riscv_vset_v_bf16m2_bf16m2x2(vbfloat16m2x2_t dest,
+                                               size_t index,
+                                               vbfloat16m2_t value);
+vbfloat16m2x3_t __riscv_vset_v_bf16m2_bf16m2x3(vbfloat16m2x3_t dest,
+                                               size_t index,
+                                               vbfloat16m2_t value);
+vbfloat16m2x4_t __riscv_vset_v_bf16m2_bf16m2x4(vbfloat16m2x4_t dest,
+                                               size_t index,
+                                               vbfloat16m2_t value);
+vbfloat16m4x2_t __riscv_vset_v_bf16m4_bf16m4x2(vbfloat16m4x2_t dest,
+                                               size_t index,
+                                               vbfloat16m4_t value);
+----
+
+[[vector-extraction]]
+==== Vector Extraction Intrinsics
+
+[,c]
+----
+vbfloat16m1_t __riscv_vget_v_bf16m2_bf16m1(vbfloat16m2_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m4_bf16m1(vbfloat16m4_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m8_bf16m1(vbfloat16m8_t src, size_t index);
+vbfloat16m2_t __riscv_vget_v_bf16m4_bf16m2(vbfloat16m4_t src, size_t index);
+vbfloat16m2_t __riscv_vget_v_bf16m8_bf16m2(vbfloat16m8_t src, size_t index);
+vbfloat16m4_t __riscv_vget_v_bf16m8_bf16m4(vbfloat16m8_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_v_bf16mf4x2_bf16mf4(vbfloat16mf4x2_t src,
+                                                size_t index);
+vbfloat16mf4_t __riscv_vget_v_bf16mf4x3_bf16mf4(vbfloat16mf4x3_t src,
+                                                size_t index);
+vbfloat16mf4_t __riscv_vget_v_bf16mf4x4_bf16mf4(vbfloat16mf4x4_t src,
+                                                size_t index);
+vbfloat16mf4_t __riscv_vget_v_bf16mf4x5_bf16mf4(vbfloat16mf4x5_t src,
+                                                size_t index);
+vbfloat16mf4_t __riscv_vget_v_bf16mf4x6_bf16mf4(vbfloat16mf4x6_t src,
+                                                size_t index);
+vbfloat16mf4_t __riscv_vget_v_bf16mf4x7_bf16mf4(vbfloat16mf4x7_t src,
+                                                size_t index);
+vbfloat16mf4_t __riscv_vget_v_bf16mf4x8_bf16mf4(vbfloat16mf4x8_t src,
+                                                size_t index);
+vbfloat16mf2_t __riscv_vget_v_bf16mf2x2_bf16mf2(vbfloat16mf2x2_t src,
+                                                size_t index);
+vbfloat16mf2_t __riscv_vget_v_bf16mf2x3_bf16mf2(vbfloat16mf2x3_t src,
+                                                size_t index);
+vbfloat16mf2_t __riscv_vget_v_bf16mf2x4_bf16mf2(vbfloat16mf2x4_t src,
+                                                size_t index);
+vbfloat16mf2_t __riscv_vget_v_bf16mf2x5_bf16mf2(vbfloat16mf2x5_t src,
+                                                size_t index);
+vbfloat16mf2_t __riscv_vget_v_bf16mf2x6_bf16mf2(vbfloat16mf2x6_t src,
+                                                size_t index);
+vbfloat16mf2_t __riscv_vget_v_bf16mf2x7_bf16mf2(vbfloat16mf2x7_t src,
+                                                size_t index);
+vbfloat16mf2_t __riscv_vget_v_bf16mf2x8_bf16mf2(vbfloat16mf2x8_t src,
+                                                size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m1x2_bf16m1(vbfloat16m1x2_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m1x3_bf16m1(vbfloat16m1x3_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m1x4_bf16m1(vbfloat16m1x4_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m1x5_bf16m1(vbfloat16m1x5_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m1x6_bf16m1(vbfloat16m1x6_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m1x7_bf16m1(vbfloat16m1x7_t src, size_t index);
+vbfloat16m1_t __riscv_vget_v_bf16m1x8_bf16m1(vbfloat16m1x8_t src, size_t index);
+vbfloat16m2_t __riscv_vget_v_bf16m2x2_bf16m2(vbfloat16m2x2_t src, size_t index);
+vbfloat16m2_t __riscv_vget_v_bf16m2x3_bf16m2(vbfloat16m2x3_t src, size_t index);
+vbfloat16m2_t __riscv_vget_v_bf16m2x4_bf16m2(vbfloat16m2x4_t src, size_t index);
+vbfloat16m4_t __riscv_vget_v_bf16m4x2_bf16m4(vbfloat16m4x2_t src, size_t index);
+----
+
+[[vector-creation]]
+==== Vector Creation Intrinsics
+
+[,c]
+----
+vbfloat16m2_t __riscv_vcreate_v_bf16m1_bf16m2(vbfloat16m1_t v0,
+                                              vbfloat16m1_t v1);
+vbfloat16m4_t __riscv_vcreate_v_bf16m1_bf16m4(vbfloat16m1_t v0,
+                                              vbfloat16m1_t v1,
+                                              vbfloat16m1_t v2,
+                                              vbfloat16m1_t v3);
+vbfloat16m8_t __riscv_vcreate_v_bf16m1_bf16m8(
+    vbfloat16m1_t v0, vbfloat16m1_t v1, vbfloat16m1_t v2, vbfloat16m1_t v3,
+    vbfloat16m1_t v4, vbfloat16m1_t v5, vbfloat16m1_t v6, vbfloat16m1_t v7);
+vbfloat16m4_t __riscv_vcreate_v_bf16m2_bf16m4(vbfloat16m2_t v0,
+                                              vbfloat16m2_t v1);
+vbfloat16m8_t __riscv_vcreate_v_bf16m2_bf16m8(vbfloat16m2_t v0,
+                                              vbfloat16m2_t v1,
+                                              vbfloat16m2_t v2,
+                                              vbfloat16m2_t v3);
+vbfloat16m8_t __riscv_vcreate_v_bf16m4_bf16m8(vbfloat16m4_t v0,
+                                              vbfloat16m4_t v1);
+vbfloat16mf4x2_t __riscv_vcreate_v_bf16mf4x2(vbfloat16mf4_t v0,
+                                             vbfloat16mf4_t v1);
+vbfloat16mf4x3_t __riscv_vcreate_v_bf16mf4x3(vbfloat16mf4_t v0,
+                                             vbfloat16mf4_t v1,
+                                             vbfloat16mf4_t v2);
+vbfloat16mf4x4_t __riscv_vcreate_v_bf16mf4x4(vbfloat16mf4_t v0,
+                                             vbfloat16mf4_t v1,
+                                             vbfloat16mf4_t v2,
+                                             vbfloat16mf4_t v3);
+vbfloat16mf4x5_t __riscv_vcreate_v_bf16mf4x5(vbfloat16mf4_t v0,
+                                             vbfloat16mf4_t v1,
+                                             vbfloat16mf4_t v2,
+                                             vbfloat16mf4_t v3,
+                                             vbfloat16mf4_t v4);
+vbfloat16mf4x6_t
+__riscv_vcreate_v_bf16mf4x6(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
+                            vbfloat16mf4_t v2, vbfloat16mf4_t v3,
+                            vbfloat16mf4_t v4, vbfloat16mf4_t v5);
+vbfloat16mf4x7_t __riscv_vcreate_v_bf16mf4x7(
+    vbfloat16mf4_t v0, vbfloat16mf4_t v1, vbfloat16mf4_t v2, vbfloat16mf4_t v3,
+    vbfloat16mf4_t v4, vbfloat16mf4_t v5, vbfloat16mf4_t v6);
+vbfloat16mf4x8_t __riscv_vcreate_v_bf16mf4x8(
+    vbfloat16mf4_t v0, vbfloat16mf4_t v1, vbfloat16mf4_t v2, vbfloat16mf4_t v3,
+    vbfloat16mf4_t v4, vbfloat16mf4_t v5, vbfloat16mf4_t v6, vbfloat16mf4_t v7);
+vbfloat16mf2x2_t __riscv_vcreate_v_bf16mf2x2(vbfloat16mf2_t v0,
+                                             vbfloat16mf2_t v1);
+vbfloat16mf2x3_t __riscv_vcreate_v_bf16mf2x3(vbfloat16mf2_t v0,
+                                             vbfloat16mf2_t v1,
+                                             vbfloat16mf2_t v2);
+vbfloat16mf2x4_t __riscv_vcreate_v_bf16mf2x4(vbfloat16mf2_t v0,
+                                             vbfloat16mf2_t v1,
+                                             vbfloat16mf2_t v2,
+                                             vbfloat16mf2_t v3);
+vbfloat16mf2x5_t __riscv_vcreate_v_bf16mf2x5(vbfloat16mf2_t v0,
+                                             vbfloat16mf2_t v1,
+                                             vbfloat16mf2_t v2,
+                                             vbfloat16mf2_t v3,
+                                             vbfloat16mf2_t v4);
+vbfloat16mf2x6_t
+__riscv_vcreate_v_bf16mf2x6(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
+                            vbfloat16mf2_t v2, vbfloat16mf2_t v3,
+                            vbfloat16mf2_t v4, vbfloat16mf2_t v5);
+vbfloat16mf2x7_t __riscv_vcreate_v_bf16mf2x7(
+    vbfloat16mf2_t v0, vbfloat16mf2_t v1, vbfloat16mf2_t v2, vbfloat16mf2_t v3,
+    vbfloat16mf2_t v4, vbfloat16mf2_t v5, vbfloat16mf2_t v6);
+vbfloat16mf2x8_t __riscv_vcreate_v_bf16mf2x8(
+    vbfloat16mf2_t v0, vbfloat16mf2_t v1, vbfloat16mf2_t v2, vbfloat16mf2_t v3,
+    vbfloat16mf2_t v4, vbfloat16mf2_t v5, vbfloat16mf2_t v6, vbfloat16mf2_t v7);
+vbfloat16m1x2_t __riscv_vcreate_v_bf16m1x2(vbfloat16m1_t v0, vbfloat16m1_t v1);
+vbfloat16m1x3_t __riscv_vcreate_v_bf16m1x3(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2);
+vbfloat16m1x4_t __riscv_vcreate_v_bf16m1x4(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2, vbfloat16m1_t v3);
+vbfloat16m1x5_t __riscv_vcreate_v_bf16m1x5(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                           vbfloat16m1_t v4);
+vbfloat16m1x6_t __riscv_vcreate_v_bf16m1x6(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                           vbfloat16m1_t v4, vbfloat16m1_t v5);
+vbfloat16m1x7_t __riscv_vcreate_v_bf16m1x7(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                           vbfloat16m1_t v4, vbfloat16m1_t v5,
+                                           vbfloat16m1_t v6);
+vbfloat16m1x8_t __riscv_vcreate_v_bf16m1x8(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                           vbfloat16m1_t v4, vbfloat16m1_t v5,
+                                           vbfloat16m1_t v6, vbfloat16m1_t v7);
+vbfloat16m2x2_t __riscv_vcreate_v_bf16m2x2(vbfloat16m2_t v0, vbfloat16m2_t v1);
+vbfloat16m2x3_t __riscv_vcreate_v_bf16m2x3(vbfloat16m2_t v0, vbfloat16m2_t v1,
+                                           vbfloat16m2_t v2);
+vbfloat16m2x4_t __riscv_vcreate_v_bf16m2x4(vbfloat16m2_t v0, vbfloat16m2_t v1,
+                                           vbfloat16m2_t v2, vbfloat16m2_t v3);
+vbfloat16m4x2_t __riscv_vcreate_v_bf16m4x2(vbfloat16m4_t v0, vbfloat16m4_t v1);
+----
diff --git a/auto-generated/bfloat16/llvm-api-tests/vcreate.c b/auto-generated/bfloat16/llvm-api-tests/vcreate.c
new file mode 100644
index 000000000..5b5dab6a8
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vcreate.c
@@ -0,0 +1,184 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16m2_t test_vcreate_v_bf16m1_bf16m2(vbfloat16m1_t v0, vbfloat16m1_t v1) {
+  return __riscv_vcreate_v_bf16m1_bf16m2(v0, v1);
+}
+
+vbfloat16m4_t test_vcreate_v_bf16m1_bf16m4(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2, vbfloat16m1_t v3) {
+  return __riscv_vcreate_v_bf16m1_bf16m4(v0, v1, v2, v3);
+}
+
+vbfloat16m8_t test_vcreate_v_bf16m1_bf16m8(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                           vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                           vbfloat16m1_t v4, vbfloat16m1_t v5,
+                                           vbfloat16m1_t v6, vbfloat16m1_t v7) {
+  return __riscv_vcreate_v_bf16m1_bf16m8(v0, v1, v2, v3, v4, v5, v6, v7);
+}
+
+vbfloat16m4_t test_vcreate_v_bf16m2_bf16m4(vbfloat16m2_t v0, vbfloat16m2_t v1) {
+  return __riscv_vcreate_v_bf16m2_bf16m4(v0, v1);
+}
+
+vbfloat16m8_t test_vcreate_v_bf16m2_bf16m8(vbfloat16m2_t v0, vbfloat16m2_t v1,
+                                           vbfloat16m2_t v2, vbfloat16m2_t v3) {
+  return __riscv_vcreate_v_bf16m2_bf16m8(v0, v1, v2, v3);
+}
+
+vbfloat16m8_t test_vcreate_v_bf16m4_bf16m8(vbfloat16m4_t v0, vbfloat16m4_t v1) {
+  return __riscv_vcreate_v_bf16m4_bf16m8(v0, v1);
+}
+
+vbfloat16mf4x2_t test_vcreate_v_bf16mf4x2(vbfloat16mf4_t v0,
+                                          vbfloat16mf4_t v1) {
+  return __riscv_vcreate_v_bf16mf4x2(v0, v1);
+}
+
+vbfloat16mf4x3_t test_vcreate_v_bf16mf4x3(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
+                                          vbfloat16mf4_t v2) {
+  return __riscv_vcreate_v_bf16mf4x3(v0, v1, v2);
+}
+
+vbfloat16mf4x4_t test_vcreate_v_bf16mf4x4(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
+                                          vbfloat16mf4_t v2,
+                                          vbfloat16mf4_t v3) {
+  return __riscv_vcreate_v_bf16mf4x4(v0, v1, v2, v3);
+}
+
+vbfloat16mf4x5_t test_vcreate_v_bf16mf4x5(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
+                                          vbfloat16mf4_t v2, vbfloat16mf4_t v3,
+                                          vbfloat16mf4_t v4) {
+  return __riscv_vcreate_v_bf16mf4x5(v0, v1, v2, v3, v4);
+}
+
+vbfloat16mf4x6_t test_vcreate_v_bf16mf4x6(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
+                                          vbfloat16mf4_t v2, vbfloat16mf4_t v3,
+                                          vbfloat16mf4_t v4,
+                                          vbfloat16mf4_t v5) {
+  return __riscv_vcreate_v_bf16mf4x6(v0, v1, v2, v3, v4, v5);
+}
+
+vbfloat16mf4x7_t test_vcreate_v_bf16mf4x7(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
+                                          vbfloat16mf4_t v2, vbfloat16mf4_t v3,
+                                          vbfloat16mf4_t v4, vbfloat16mf4_t v5,
+                                          vbfloat16mf4_t v6) {
+  return __riscv_vcreate_v_bf16mf4x7(v0, v1, v2, v3, v4, v5, v6);
+}
+
+vbfloat16mf4x8_t test_vcreate_v_bf16mf4x8(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
+                                          vbfloat16mf4_t v2, vbfloat16mf4_t v3,
+                                          vbfloat16mf4_t v4, vbfloat16mf4_t v5,
+                                          vbfloat16mf4_t v6,
+                                          vbfloat16mf4_t v7) {
+  return __riscv_vcreate_v_bf16mf4x8(v0, v1, v2, v3, v4, v5, v6, v7);
+}
+
+vbfloat16mf2x2_t test_vcreate_v_bf16mf2x2(vbfloat16mf2_t v0,
+                                          vbfloat16mf2_t v1) {
+  return __riscv_vcreate_v_bf16mf2x2(v0, v1);
+}
+
+vbfloat16mf2x3_t test_vcreate_v_bf16mf2x3(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
+                                          vbfloat16mf2_t v2) {
+  return __riscv_vcreate_v_bf16mf2x3(v0, v1, v2);
+}
+
+vbfloat16mf2x4_t test_vcreate_v_bf16mf2x4(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
+                                          vbfloat16mf2_t v2,
+                                          vbfloat16mf2_t v3) {
+  return __riscv_vcreate_v_bf16mf2x4(v0, v1, v2, v3);
+}
+
+vbfloat16mf2x5_t test_vcreate_v_bf16mf2x5(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
+                                          vbfloat16mf2_t v2, vbfloat16mf2_t v3,
+                                          vbfloat16mf2_t v4) {
+  return __riscv_vcreate_v_bf16mf2x5(v0, v1, v2, v3, v4);
+}
+
+vbfloat16mf2x6_t test_vcreate_v_bf16mf2x6(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
+                                          vbfloat16mf2_t v2, vbfloat16mf2_t v3,
+                                          vbfloat16mf2_t v4,
+                                          vbfloat16mf2_t v5) {
+  return __riscv_vcreate_v_bf16mf2x6(v0, v1, v2, v3, v4, v5);
+}
+
+vbfloat16mf2x7_t test_vcreate_v_bf16mf2x7(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
+                                          vbfloat16mf2_t v2, vbfloat16mf2_t v3,
+                                          vbfloat16mf2_t v4, vbfloat16mf2_t v5,
+                                          vbfloat16mf2_t v6) {
+  return __riscv_vcreate_v_bf16mf2x7(v0, v1, v2, v3, v4, v5, v6);
+}
+
+vbfloat16mf2x8_t test_vcreate_v_bf16mf2x8(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
+                                          vbfloat16mf2_t v2, vbfloat16mf2_t v3,
+                                          vbfloat16mf2_t v4, vbfloat16mf2_t v5,
+                                          vbfloat16mf2_t v6,
+                                          vbfloat16mf2_t v7) {
+  return __riscv_vcreate_v_bf16mf2x8(v0, v1, v2, v3, v4, v5, v6, v7);
+}
+
+vbfloat16m1x2_t test_vcreate_v_bf16m1x2(vbfloat16m1_t v0, vbfloat16m1_t v1) {
+  return __riscv_vcreate_v_bf16m1x2(v0, v1);
+}
+
+vbfloat16m1x3_t test_vcreate_v_bf16m1x3(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                        vbfloat16m1_t v2) {
+  return __riscv_vcreate_v_bf16m1x3(v0, v1, v2);
+}
+
+vbfloat16m1x4_t test_vcreate_v_bf16m1x4(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                        vbfloat16m1_t v2, vbfloat16m1_t v3) {
+  return __riscv_vcreate_v_bf16m1x4(v0, v1, v2, v3);
+}
+
+vbfloat16m1x5_t test_vcreate_v_bf16m1x5(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                        vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                        vbfloat16m1_t v4) {
+  return __riscv_vcreate_v_bf16m1x5(v0, v1, v2, v3, v4);
+}
+
+vbfloat16m1x6_t test_vcreate_v_bf16m1x6(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                        vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                        vbfloat16m1_t v4, vbfloat16m1_t v5) {
+  return __riscv_vcreate_v_bf16m1x6(v0, v1, v2, v3, v4, v5);
+}
+
+vbfloat16m1x7_t test_vcreate_v_bf16m1x7(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                        vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                        vbfloat16m1_t v4, vbfloat16m1_t v5,
+                                        vbfloat16m1_t v6) {
+  return __riscv_vcreate_v_bf16m1x7(v0, v1, v2, v3, v4, v5, v6);
+}
+
+vbfloat16m1x8_t test_vcreate_v_bf16m1x8(vbfloat16m1_t v0, vbfloat16m1_t v1,
+                                        vbfloat16m1_t v2, vbfloat16m1_t v3,
+                                        vbfloat16m1_t v4, vbfloat16m1_t v5,
+                                        vbfloat16m1_t v6, vbfloat16m1_t v7) {
+  return __riscv_vcreate_v_bf16m1x8(v0, v1, v2, v3, v4, v5, v6, v7);
+}
+
+vbfloat16m2x2_t test_vcreate_v_bf16m2x2(vbfloat16m2_t v0, vbfloat16m2_t v1) {
+  return __riscv_vcreate_v_bf16m2x2(v0, v1);
+}
+
+vbfloat16m2x3_t test_vcreate_v_bf16m2x3(vbfloat16m2_t v0, vbfloat16m2_t v1,
+                                        vbfloat16m2_t v2) {
+  return __riscv_vcreate_v_bf16m2x3(v0, v1, v2);
+}
+
+vbfloat16m2x4_t test_vcreate_v_bf16m2x4(vbfloat16m2_t v0, vbfloat16m2_t v1,
+                                        vbfloat16m2_t v2, vbfloat16m2_t v3) {
+  return __riscv_vcreate_v_bf16m2x4(v0, v1, v2, v3);
+}
+
+vbfloat16m4x2_t test_vcreate_v_bf16m4x2(vbfloat16m4_t v0, vbfloat16m4_t v1) {
+  return __riscv_vcreate_v_bf16m4x2(v0, v1);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vfncvtbf16.c b/auto-generated/bfloat16/llvm-api-tests/vfncvtbf16.c
new file mode 100644
index 000000000..4a81a4e71
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vfncvtbf16.c
@@ -0,0 +1,99 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4(vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4(vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2(vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2(vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1(vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1(vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2(vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2(vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4(vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4(vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_m(vbool64_t vm, vfloat32mf2_t vs2,
+                                               size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_m(vm, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_m(vbool32_t vm, vfloat32m1_t vs2,
+                                               size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_m(vm, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_m(vbool16_t vm, vfloat32m2_t vs2,
+                                             size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_m(vm, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_m(vbool8_t vm, vfloat32m4_t vs2,
+                                             size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_m(vm, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_m(vbool4_t vm, vfloat32m8_t vs2,
+                                             size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_m(vm, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm(vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_rm(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm(vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_rm(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm(vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_rm(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm(vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_rm(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm(vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_rm(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf4_t
+test_vfncvtbf16_f_f_w_bf16mf4_rm_m(vbool64_t vm, vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_m(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_m(vbool32_t vm,
+                                                  vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_m(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_m(vbool16_t vm, vfloat32m2_t vs2,
+                                                size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_rm_m(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_m(vbool8_t vm, vfloat32m4_t vs2,
+                                                size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_rm_m(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_m(vbool4_t vm, vfloat32m8_t vs2,
+                                                size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_rm_m(vm, vs2, __RISCV_FRM_RNE, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vfwcvtbf16.c b/auto-generated/bfloat16/llvm-api-tests/vfwcvtbf16.c
new file mode 100644
index 000000000..0572244e5
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vfwcvtbf16.c
@@ -0,0 +1,54 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2(vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32mf2(vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1(vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m1(vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2(vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m2(vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4(vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m4(vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8(vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m8(vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_m(vbool64_t vm, vbfloat16mf4_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32mf2_m(vm, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_m(vbool32_t vm, vbfloat16mf2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m1_m(vm, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_m(vbool16_t vm, vbfloat16m1_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m2_m(vm, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_m(vbool8_t vm, vbfloat16m2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m4_m(vm, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_m(vbool4_t vm, vbfloat16m4_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m8_m(vm, vs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vfwmaccbf16.c b/auto-generated/bfloat16/llvm-api-tests/vfwmaccbf16.c
new file mode 100644
index 000000000..c1ba47c29
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vfwmaccbf16.c
@@ -0,0 +1,240 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2(vfloat32mf2_t vd, vbfloat16mf4_t vs1,
+                                         vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2(vfloat32mf2_t vd, __bf16 vs1,
+                                         vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                       vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1(vfloat32m1_t vd, __bf16 vs1,
+                                       vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                       vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2(vfloat32m2_t vd, __bf16 vs1,
+                                       vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                       vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4(vfloat32m4_t vd, __bf16 vs1,
+                                       vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                       vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8(vfloat32m8_t vd, __bf16 vs1,
+                                       vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_m(vbool64_t vm, vfloat32mf2_t vd,
+                                           vbfloat16mf4_t vs1,
+                                           vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_m(vbool64_t vm, vfloat32mf2_t vd,
+                                           __bf16 vs1, vbfloat16mf4_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_m(vbool32_t vm, vfloat32m1_t vd,
+                                         vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_m(vbool32_t vm, vfloat32m1_t vd,
+                                         __bf16 vs1, vbfloat16mf2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_m(vbool16_t vm, vfloat32m2_t vd,
+                                         vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_m(vbool16_t vm, vfloat32m2_t vd,
+                                         __bf16 vs1, vbfloat16m1_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_m(vbool8_t vm, vfloat32m4_t vd,
+                                         vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_m(vbool8_t vm, vfloat32m4_t vd,
+                                         __bf16 vs1, vbfloat16m2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_m(vbool4_t vm, vfloat32m8_t vd,
+                                         vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_m(vbool4_t vm, vfloat32m8_t vd,
+                                         __bf16 vs1, vbfloat16m4_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_m(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm(vfloat32mf2_t vd,
+                                            vbfloat16mf4_t vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm(vfloat32mf2_t vd, __bf16 vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm(vfloat32m1_t vd, __bf16 vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                          vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm(vfloat32m2_t vd, __bf16 vs1,
+                                          vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                          vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm(vfloat32m4_t vd, __bf16 vs1,
+                                          vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                          vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm(vfloat32m8_t vd, __bf16 vs1,
+                                          vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_rm(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_m(vbool64_t vm, vfloat32mf2_t vd,
+                                              vbfloat16mf4_t vs1,
+                                              vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                            vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_m(vbool64_t vm, vfloat32mf2_t vd,
+                                              __bf16 vs1, vbfloat16mf4_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                            vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_m(vbool32_t vm, vfloat32m1_t vd,
+                                            vbfloat16mf2_t vs1,
+                                            vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_m(vbool32_t vm, vfloat32m1_t vd,
+                                            __bf16 vs1, vbfloat16mf2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_m(vbool16_t vm, vfloat32m2_t vd,
+                                            vbfloat16m1_t vs1,
+                                            vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_m(vbool16_t vm, vfloat32m2_t vd,
+                                            __bf16 vs1, vbfloat16m1_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_m(vbool8_t vm, vfloat32m4_t vd,
+                                            vbfloat16m2_t vs1,
+                                            vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_m(vbool8_t vm, vfloat32m4_t vd,
+                                            __bf16 vs1, vbfloat16m2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_m(vbool4_t vm, vfloat32m8_t vd,
+                                            vbfloat16m4_t vs1,
+                                            vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_m(vbool4_t vm, vfloat32m8_t vd,
+                                            __bf16 vs1, vbfloat16m4_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_rm_m(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                           vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vget.c b/auto-generated/bfloat16/llvm-api-tests/vget.c
new file mode 100644
index 000000000..61473a4ea
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vget.c
@@ -0,0 +1,147 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16m1_t test_vget_v_bf16m2_bf16m1(vbfloat16m2_t src, size_t index) {
+  return __riscv_vget_v_bf16m2_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m4_bf16m1(vbfloat16m4_t src, size_t index) {
+  return __riscv_vget_v_bf16m4_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m8_bf16m1(vbfloat16m8_t src, size_t index) {
+  return __riscv_vget_v_bf16m8_bf16m1(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m4_bf16m2(vbfloat16m4_t src, size_t index) {
+  return __riscv_vget_v_bf16m4_bf16m2(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m8_bf16m2(vbfloat16m8_t src, size_t index) {
+  return __riscv_vget_v_bf16m8_bf16m2(src, 0);
+}
+
+vbfloat16m4_t test_vget_v_bf16m8_bf16m4(vbfloat16m8_t src, size_t index) {
+  return __riscv_vget_v_bf16m8_bf16m4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x2_bf16mf4(vbfloat16mf4x2_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf4x2_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x3_bf16mf4(vbfloat16mf4x3_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf4x3_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x4_bf16mf4(vbfloat16mf4x4_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf4x4_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x5_bf16mf4(vbfloat16mf4x5_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf4x5_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x6_bf16mf4(vbfloat16mf4x6_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf4x6_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x7_bf16mf4(vbfloat16mf4x7_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf4x7_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x8_bf16mf4(vbfloat16mf4x8_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf4x8_bf16mf4(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x2_bf16mf2(vbfloat16mf2x2_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf2x2_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x3_bf16mf2(vbfloat16mf2x3_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf2x3_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x4_bf16mf2(vbfloat16mf2x4_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf2x4_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x5_bf16mf2(vbfloat16mf2x5_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf2x5_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x6_bf16mf2(vbfloat16mf2x6_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf2x6_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x7_bf16mf2(vbfloat16mf2x7_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf2x7_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x8_bf16mf2(vbfloat16mf2x8_t src,
+                                             size_t index) {
+  return __riscv_vget_v_bf16mf2x8_bf16mf2(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x2_bf16m1(vbfloat16m1x2_t src, size_t index) {
+  return __riscv_vget_v_bf16m1x2_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x3_bf16m1(vbfloat16m1x3_t src, size_t index) {
+  return __riscv_vget_v_bf16m1x3_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x4_bf16m1(vbfloat16m1x4_t src, size_t index) {
+  return __riscv_vget_v_bf16m1x4_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x5_bf16m1(vbfloat16m1x5_t src, size_t index) {
+  return __riscv_vget_v_bf16m1x5_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x6_bf16m1(vbfloat16m1x6_t src, size_t index) {
+  return __riscv_vget_v_bf16m1x6_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x7_bf16m1(vbfloat16m1x7_t src, size_t index) {
+  return __riscv_vget_v_bf16m1x7_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x8_bf16m1(vbfloat16m1x8_t src, size_t index) {
+  return __riscv_vget_v_bf16m1x8_bf16m1(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m2x2_bf16m2(vbfloat16m2x2_t src, size_t index) {
+  return __riscv_vget_v_bf16m2x2_bf16m2(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m2x3_bf16m2(vbfloat16m2x3_t src, size_t index) {
+  return __riscv_vget_v_bf16m2x3_bf16m2(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m2x4_bf16m2(vbfloat16m2x4_t src, size_t index) {
+  return __riscv_vget_v_bf16m2x4_bf16m2(src, 0);
+}
+
+vbfloat16m4_t test_vget_v_bf16m4x2_bf16m4(vbfloat16m4x2_t src, size_t index) {
+  return __riscv_vget_v_bf16m4x2_bf16m4(src, 0);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vle16.c b/auto-generated/bfloat16/llvm-api-tests/vle16.c
new file mode 100644
index 000000000..db5ed90dc
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vle16.c
@@ -0,0 +1,60 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vle16_v_bf16mf4(const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf4(rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2(const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf2(rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1(const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m1(rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2(const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m2(rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4(const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m4(rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8(const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m8(rs1, vl);
+}
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                      size_t vl) {
+  return __riscv_vle16_v_bf16mf4_m(vm, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                      size_t vl) {
+  return __riscv_vle16_v_bf16mf2_m(vm, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                    size_t vl) {
+  return __riscv_vle16_v_bf16m1_m(vm, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m2_m(vm, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m4_m(vm, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m8_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vle16ff.c b/auto-generated/bfloat16/llvm-api-tests/vle16ff.c
new file mode 100644
index 000000000..c1bd752af
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vle16ff.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4(const __bf16 *rs1, size_t *new_vl,
+                                      size_t vl) {
+  return __riscv_vle16ff_v_bf16mf4(rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2(const __bf16 *rs1, size_t *new_vl,
+                                      size_t vl) {
+  return __riscv_vle16ff_v_bf16mf2(rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1(const __bf16 *rs1, size_t *new_vl,
+                                    size_t vl) {
+  return __riscv_vle16ff_v_bf16m1(rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2(const __bf16 *rs1, size_t *new_vl,
+                                    size_t vl) {
+  return __riscv_vle16ff_v_bf16m2(rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4(const __bf16 *rs1, size_t *new_vl,
+                                    size_t vl) {
+  return __riscv_vle16ff_v_bf16m4(rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8(const __bf16 *rs1, size_t *new_vl,
+                                    size_t vl) {
+  return __riscv_vle16ff_v_bf16m8(rs1, new_vl, vl);
+}
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16mf4_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16mf2_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m1_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m2_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m4_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m8_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlmul_ext_v.c b/auto-generated/bfloat16/llvm-api-tests/vlmul_ext_v.c
new file mode 100644
index 000000000..a9cc1c367
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlmul_ext_v.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf2_t test_vlmul_ext_v_bf16mf4_bf16mf2(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_v_bf16mf4_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_ext_v_bf16mf4_bf16m1(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_v_bf16mf4_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_ext_v_bf16mf4_bf16m2(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_v_bf16mf4_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16mf4_bf16m4(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_v_bf16mf4_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16mf4_bf16m8(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_v_bf16mf4_bf16m8(value);
+}
+
+vbfloat16m1_t test_vlmul_ext_v_bf16mf2_bf16m1(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_v_bf16mf2_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_ext_v_bf16mf2_bf16m2(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_v_bf16mf2_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16mf2_bf16m4(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_v_bf16mf2_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16mf2_bf16m8(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_v_bf16mf2_bf16m8(value);
+}
+
+vbfloat16m2_t test_vlmul_ext_v_bf16m1_bf16m2(vbfloat16m1_t value) {
+  return __riscv_vlmul_ext_v_bf16m1_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16m1_bf16m4(vbfloat16m1_t value) {
+  return __riscv_vlmul_ext_v_bf16m1_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16m1_bf16m8(vbfloat16m1_t value) {
+  return __riscv_vlmul_ext_v_bf16m1_bf16m8(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16m2_bf16m4(vbfloat16m2_t value) {
+  return __riscv_vlmul_ext_v_bf16m2_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16m2_bf16m8(vbfloat16m2_t value) {
+  return __riscv_vlmul_ext_v_bf16m2_bf16m8(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16m4_bf16m8(vbfloat16m4_t value) {
+  return __riscv_vlmul_ext_v_bf16m4_bf16m8(value);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlmul_trunc_v.c b/auto-generated/bfloat16/llvm-api-tests/vlmul_trunc_v.c
new file mode 100644
index 000000000..9bdca7bca
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlmul_trunc_v.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16mf2_bf16mf4(vbfloat16mf2_t value) {
+  return __riscv_vlmul_trunc_v_bf16mf2_bf16mf4(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m1_bf16mf4(vbfloat16m1_t value) {
+  return __riscv_vlmul_trunc_v_bf16m1_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m1_bf16mf2(vbfloat16m1_t value) {
+  return __riscv_vlmul_trunc_v_bf16m1_bf16mf2(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m2_bf16mf4(vbfloat16m2_t value) {
+  return __riscv_vlmul_trunc_v_bf16m2_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m2_bf16mf2(vbfloat16m2_t value) {
+  return __riscv_vlmul_trunc_v_bf16m2_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_trunc_v_bf16m2_bf16m1(vbfloat16m2_t value) {
+  return __riscv_vlmul_trunc_v_bf16m2_bf16m1(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m4_bf16mf4(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_v_bf16m4_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m4_bf16mf2(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_v_bf16m4_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_trunc_v_bf16m4_bf16m1(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_v_bf16m4_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_trunc_v_bf16m4_bf16m2(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_v_bf16m4_bf16m2(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m8_bf16mf4(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_v_bf16m8_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m8_bf16mf2(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_v_bf16m8_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_trunc_v_bf16m8_bf16m1(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_v_bf16m8_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_trunc_v_bf16m8_bf16m2(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_v_bf16m8_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_trunc_v_bf16m8_bf16m4(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_v_bf16m8_bf16m4(value);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vloxei16.c b/auto-generated/bfloat16/llvm-api-tests/vloxei16.c
new file mode 100644
index 000000000..a8e7d4dc5
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vloxei16.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4(const __bf16 *rs1, vuint16mf4_t rs2,
+                                       size_t vl) {
+  return __riscv_vloxei16_v_bf16mf4(rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2(const __bf16 *rs1, vuint16mf2_t rs2,
+                                       size_t vl) {
+  return __riscv_vloxei16_v_bf16mf2(rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16_v_bf16m1(rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16_v_bf16m2(rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4(const __bf16 *rs1, vuint16m4_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16_v_bf16m4(rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8(const __bf16 *rs1, vuint16m8_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16_v_bf16m8(rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16mf4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16mf2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                       vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m1_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                       vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                       vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                       vuint16m8_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m8_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vloxseg2ei16.c b/auto-generated/bfloat16/llvm-api-tests/vloxseg2ei16.c
new file mode 100644
index 000000000..31478d5a5
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vloxseg2ei16.c
@@ -0,0 +1,61 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf4x2(rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf2x2(rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m1x2(rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m2x2(rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2(const __bf16 *rs1, vuint16m4_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m4x2(rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf4x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf2x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m1x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m2x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                               vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m4x2_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vloxseg3ei16.c b/auto-generated/bfloat16/llvm-api-tests/vloxseg3ei16.c
new file mode 100644
index 000000000..a0e1c2eb3
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vloxseg3ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf4x3(rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf2x3(rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m1x3(rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m2x3(rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf4x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf2x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m1x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m2x3_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vloxseg4ei16.c b/auto-generated/bfloat16/llvm-api-tests/vloxseg4ei16.c
new file mode 100644
index 000000000..0c3b9c66f
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vloxseg4ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf4x4(rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf2x4(rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m1x4(rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m2x4(rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf4x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf2x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m1x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m2x4_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vloxseg5ei16.c b/auto-generated/bfloat16/llvm-api-tests/vloxseg5ei16.c
new file mode 100644
index 000000000..99b75edcd
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vloxseg5ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf4x5(rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf2x5(rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16m1x5(rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf4x5_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf2x5_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16m1x5_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vloxseg6ei16.c b/auto-generated/bfloat16/llvm-api-tests/vloxseg6ei16.c
new file mode 100644
index 000000000..d700d64e2
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vloxseg6ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf4x6(rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf2x6(rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16m1x6(rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf4x6_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf2x6_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16m1x6_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vloxseg7ei16.c b/auto-generated/bfloat16/llvm-api-tests/vloxseg7ei16.c
new file mode 100644
index 000000000..218746d8d
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vloxseg7ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf4x7(rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf2x7(rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16m1x7(rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf4x7_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf2x7_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16m1x7_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vloxseg8ei16.c b/auto-generated/bfloat16/llvm-api-tests/vloxseg8ei16.c
new file mode 100644
index 000000000..1e4fa305e
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vloxseg8ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf4x8(rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf2x8(rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16m1x8(rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf4x8_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf2x8_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16m1x8_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlse16.c b/auto-generated/bfloat16/llvm-api-tests/vlse16.c
new file mode 100644
index 000000000..f9cb9fb39
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlse16.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4(const __bf16 *rs1, ptrdiff_t rs2,
+                                     size_t vl) {
+  return __riscv_vlse16_v_bf16mf4(rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2(const __bf16 *rs1, ptrdiff_t rs2,
+                                     size_t vl) {
+  return __riscv_vlse16_v_bf16mf2(rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1(const __bf16 *rs1, ptrdiff_t rs2,
+                                   size_t vl) {
+  return __riscv_vlse16_v_bf16m1(rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2(const __bf16 *rs1, ptrdiff_t rs2,
+                                   size_t vl) {
+  return __riscv_vlse16_v_bf16m2(rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4(const __bf16 *rs1, ptrdiff_t rs2,
+                                   size_t vl) {
+  return __riscv_vlse16_v_bf16m4(rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8(const __bf16 *rs1, ptrdiff_t rs2,
+                                   size_t vl) {
+  return __riscv_vlse16_v_bf16m8(rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16mf4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16mf2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m1_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m8_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlseg2e16.c b/auto-generated/bfloat16/llvm-api-tests/vlseg2e16.c
new file mode 100644
index 000000000..98770a402
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlseg2e16.c
@@ -0,0 +1,54 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf4x2(rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf2x2(rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m1x2(rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m2x2(rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m4x2(rs1, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf4x2_m(vm, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf2x2_m(vm, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m1x2_m(vm, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m2x2_m(vm, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m4x2_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlseg2e16ff.c b/auto-generated/bfloat16/llvm-api-tests/vlseg2e16ff.c
new file mode 100644
index 000000000..72f3d77cd
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlseg2e16ff.c
@@ -0,0 +1,59 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf4x2(rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf2x2(rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m1x2(rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m2x2(rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m4x2(rs1, new_vl, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf4x2_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf2x2_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m1x2_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m2x2_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m4x2_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlseg3e16.c b/auto-generated/bfloat16/llvm-api-tests/vlseg3e16.c
new file mode 100644
index 000000000..e7f6fd2d1
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlseg3e16.c
@@ -0,0 +1,45 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf4x3(rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf2x3(rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m1x3(rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m2x3(rs1, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf4x3_m(vm, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf2x3_m(vm, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m1x3_m(vm, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m2x3_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlseg3e16ff.c b/auto-generated/bfloat16/llvm-api-tests/vlseg3e16ff.c
new file mode 100644
index 000000000..a71b248e6
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlseg3e16ff.c
@@ -0,0 +1,49 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf4x3(rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf2x3(rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m1x3(rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m2x3(rs1, new_vl, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf4x3_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf2x3_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m1x3_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m2x3_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlseg4e16.c b/auto-generated/bfloat16/llvm-api-tests/vlseg4e16.c
new file mode 100644
index 000000000..597738d92
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlseg4e16.c
@@ -0,0 +1,45 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf4x4(rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf2x4(rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m1x4(rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m2x4(rs1, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf4x4_m(vm, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf2x4_m(vm, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m1x4_m(vm, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m2x4_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlseg4e16ff.c b/auto-generated/bfloat16/llvm-api-tests/vlseg4e16ff.c
new file mode 100644
index 000000000..1531d9221
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlseg4e16ff.c
@@ -0,0 +1,49 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf4x4(rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf2x4(rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m1x4(rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m2x4(rs1, new_vl, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf4x4_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf2x4_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m1x4_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m2x4_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlseg5e16.c b/auto-generated/bfloat16/llvm-api-tests/vlseg5e16.c
new file mode 100644
index 000000000..1c894ce64
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlseg5e16.c
@@ -0,0 +1,36 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf4x5(rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf2x5(rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16m1x5(rs1, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf4x5_m(vm, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf2x5_m(vm, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg5e16_v_bf16m1x5_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlseg5e16ff.c b/auto-generated/bfloat16/llvm-api-tests/vlseg5e16ff.c
new file mode 100644
index 000000000..3672f2061
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlseg5e16ff.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf4x5(rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf2x5(rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16m1x5(rs1, new_vl, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf4x5_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf2x5_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16m1x5_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlseg6e16.c b/auto-generated/bfloat16/llvm-api-tests/vlseg6e16.c
new file mode 100644
index 000000000..554ec2d93
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlseg6e16.c
@@ -0,0 +1,36 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf4x6(rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf2x6(rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16m1x6(rs1, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf4x6_m(vm, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf2x6_m(vm, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg6e16_v_bf16m1x6_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlseg6e16ff.c b/auto-generated/bfloat16/llvm-api-tests/vlseg6e16ff.c
new file mode 100644
index 000000000..419c4ab8e
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlseg6e16ff.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf4x6(rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf2x6(rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16m1x6(rs1, new_vl, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf4x6_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf2x6_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16m1x6_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlseg7e16.c b/auto-generated/bfloat16/llvm-api-tests/vlseg7e16.c
new file mode 100644
index 000000000..42ffe0707
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlseg7e16.c
@@ -0,0 +1,36 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf4x7(rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf2x7(rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16m1x7(rs1, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf4x7_m(vm, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf2x7_m(vm, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg7e16_v_bf16m1x7_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlseg7e16ff.c b/auto-generated/bfloat16/llvm-api-tests/vlseg7e16ff.c
new file mode 100644
index 000000000..8926f6553
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlseg7e16ff.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf4x7(rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf2x7(rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16m1x7(rs1, new_vl, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf4x7_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf2x7_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16m1x7_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlseg8e16.c b/auto-generated/bfloat16/llvm-api-tests/vlseg8e16.c
new file mode 100644
index 000000000..fa7278cb6
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlseg8e16.c
@@ -0,0 +1,36 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf4x8(rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf2x8(rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8(const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16m1x8(rs1, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf4x8_m(vm, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf2x8_m(vm, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg8e16_v_bf16m1x8_m(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlseg8e16ff.c b/auto-generated/bfloat16/llvm-api-tests/vlseg8e16ff.c
new file mode 100644
index 000000000..61accef57
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlseg8e16ff.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf4x8(rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8(const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf2x8(rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8(const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16m1x8(rs1, new_vl, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf4x8_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf2x8_m(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16m1x8_m(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlsseg2e16.c b/auto-generated/bfloat16/llvm-api-tests/vlsseg2e16.c
new file mode 100644
index 000000000..cd13c384c
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlsseg2e16.c
@@ -0,0 +1,59 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf4x2(rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf2x2(rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m1x2(rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m2x2(rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m4x2(rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf4x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf2x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m1x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m2x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m4x2_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlsseg3e16.c b/auto-generated/bfloat16/llvm-api-tests/vlsseg3e16.c
new file mode 100644
index 000000000..68f6d7be4
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlsseg3e16.c
@@ -0,0 +1,49 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf4x3(rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf2x3(rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m1x3(rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m2x3(rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf4x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf2x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m1x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m2x3_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlsseg4e16.c b/auto-generated/bfloat16/llvm-api-tests/vlsseg4e16.c
new file mode 100644
index 000000000..ae5296c76
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlsseg4e16.c
@@ -0,0 +1,49 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf4x4(rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf2x4(rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m1x4(rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m2x4(rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf4x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf2x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m1x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m2x4_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlsseg5e16.c b/auto-generated/bfloat16/llvm-api-tests/vlsseg5e16.c
new file mode 100644
index 000000000..10f84f99a
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlsseg5e16.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf4x5(rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf2x5(rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16m1x5(rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf4x5_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf2x5_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16m1x5_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlsseg6e16.c b/auto-generated/bfloat16/llvm-api-tests/vlsseg6e16.c
new file mode 100644
index 000000000..c39c63830
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlsseg6e16.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf4x6(rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf2x6(rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16m1x6(rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf4x6_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf2x6_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16m1x6_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlsseg7e16.c b/auto-generated/bfloat16/llvm-api-tests/vlsseg7e16.c
new file mode 100644
index 000000000..943c4a19e
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlsseg7e16.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf4x7(rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf2x7(rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16m1x7(rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf4x7_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf2x7_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16m1x7_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vlsseg8e16.c b/auto-generated/bfloat16/llvm-api-tests/vlsseg8e16.c
new file mode 100644
index 000000000..cda03cedf
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vlsseg8e16.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf4x8(rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8(const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf2x8(rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8(const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16m1x8(rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf4x8_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf2x8_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16m1x8_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vluxei16.c b/auto-generated/bfloat16/llvm-api-tests/vluxei16.c
new file mode 100644
index 000000000..0bacd5b35
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vluxei16.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4(const __bf16 *rs1, vuint16mf4_t rs2,
+                                       size_t vl) {
+  return __riscv_vluxei16_v_bf16mf4(rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2(const __bf16 *rs1, vuint16mf2_t rs2,
+                                       size_t vl) {
+  return __riscv_vluxei16_v_bf16mf2(rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16_v_bf16m1(rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16_v_bf16m2(rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4(const __bf16 *rs1, vuint16m4_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16_v_bf16m4(rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8(const __bf16 *rs1, vuint16m8_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16_v_bf16m8(rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16mf4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16mf2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                       vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m1_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                       vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                       vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                       vuint16m8_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m8_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vluxseg2ei16.c b/auto-generated/bfloat16/llvm-api-tests/vluxseg2ei16.c
new file mode 100644
index 000000000..c54b55e80
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vluxseg2ei16.c
@@ -0,0 +1,61 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf4x2(rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf2x2(rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m1x2(rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m2x2(rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2(const __bf16 *rs1, vuint16m4_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m4x2(rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf4x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf2x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m1x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m2x2_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                               vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m4x2_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vluxseg3ei16.c b/auto-generated/bfloat16/llvm-api-tests/vluxseg3ei16.c
new file mode 100644
index 000000000..146f46088
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vluxseg3ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf4x3(rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf2x3(rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m1x3(rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m2x3(rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf4x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf2x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m1x3_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m2x3_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vluxseg4ei16.c b/auto-generated/bfloat16/llvm-api-tests/vluxseg4ei16.c
new file mode 100644
index 000000000..3bb62589b
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vluxseg4ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf4x4(rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf2x4(rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m1x4(rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m2x4(rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf4x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf2x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m1x4_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m2x4_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vluxseg5ei16.c b/auto-generated/bfloat16/llvm-api-tests/vluxseg5ei16.c
new file mode 100644
index 000000000..42b121802
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vluxseg5ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf4x5(rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf2x5(rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16m1x5(rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf4x5_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf2x5_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16m1x5_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vluxseg6ei16.c b/auto-generated/bfloat16/llvm-api-tests/vluxseg6ei16.c
new file mode 100644
index 000000000..3059d8cad
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vluxseg6ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf4x6(rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf2x6(rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16m1x6(rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf4x6_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf2x6_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16m1x6_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vluxseg7ei16.c b/auto-generated/bfloat16/llvm-api-tests/vluxseg7ei16.c
new file mode 100644
index 000000000..9ec7135aa
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vluxseg7ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf4x7(rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf2x7(rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16m1x7(rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf4x7_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf2x7_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16m1x7_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vluxseg8ei16.c b/auto-generated/bfloat16/llvm-api-tests/vluxseg8ei16.c
new file mode 100644
index 000000000..5fc52ed9a
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vluxseg8ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf4x8(rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf2x8(rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16m1x8(rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf4x8_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf2x8_m(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16m1x8_m(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vreinterpret.c b/auto-generated/bfloat16/llvm-api-tests/vreinterpret.c
new file mode 100644
index 000000000..b9143958e
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vreinterpret.c
@@ -0,0 +1,105 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vreinterpret_v_i16mf4_bf16mf4(vint16mf4_t src) {
+  return __riscv_vreinterpret_v_i16mf4_bf16mf4(src);
+}
+
+vbfloat16mf2_t test_vreinterpret_v_i16mf2_bf16mf2(vint16mf2_t src) {
+  return __riscv_vreinterpret_v_i16mf2_bf16mf2(src);
+}
+
+vbfloat16m1_t test_vreinterpret_v_i16m1_bf16m1(vint16m1_t src) {
+  return __riscv_vreinterpret_v_i16m1_bf16m1(src);
+}
+
+vbfloat16m2_t test_vreinterpret_v_i16m2_bf16m2(vint16m2_t src) {
+  return __riscv_vreinterpret_v_i16m2_bf16m2(src);
+}
+
+vbfloat16m4_t test_vreinterpret_v_i16m4_bf16m4(vint16m4_t src) {
+  return __riscv_vreinterpret_v_i16m4_bf16m4(src);
+}
+
+vbfloat16m8_t test_vreinterpret_v_i16m8_bf16m8(vint16m8_t src) {
+  return __riscv_vreinterpret_v_i16m8_bf16m8(src);
+}
+
+vbfloat16mf4_t test_vreinterpret_v_u16mf4_bf16mf4(vuint16mf4_t src) {
+  return __riscv_vreinterpret_v_u16mf4_bf16mf4(src);
+}
+
+vbfloat16mf2_t test_vreinterpret_v_u16mf2_bf16mf2(vuint16mf2_t src) {
+  return __riscv_vreinterpret_v_u16mf2_bf16mf2(src);
+}
+
+vbfloat16m1_t test_vreinterpret_v_u16m1_bf16m1(vuint16m1_t src) {
+  return __riscv_vreinterpret_v_u16m1_bf16m1(src);
+}
+
+vbfloat16m2_t test_vreinterpret_v_u16m2_bf16m2(vuint16m2_t src) {
+  return __riscv_vreinterpret_v_u16m2_bf16m2(src);
+}
+
+vbfloat16m4_t test_vreinterpret_v_u16m4_bf16m4(vuint16m4_t src) {
+  return __riscv_vreinterpret_v_u16m4_bf16m4(src);
+}
+
+vbfloat16m8_t test_vreinterpret_v_u16m8_bf16m8(vuint16m8_t src) {
+  return __riscv_vreinterpret_v_u16m8_bf16m8(src);
+}
+
+vint16mf4_t test_vreinterpret_v_bf16mf4_i16mf4(vbfloat16mf4_t src) {
+  return __riscv_vreinterpret_v_bf16mf4_i16mf4(src);
+}
+
+vint16mf2_t test_vreinterpret_v_bf16mf2_i16mf2(vbfloat16mf2_t src) {
+  return __riscv_vreinterpret_v_bf16mf2_i16mf2(src);
+}
+
+vint16m1_t test_vreinterpret_v_bf16m1_i16m1(vbfloat16m1_t src) {
+  return __riscv_vreinterpret_v_bf16m1_i16m1(src);
+}
+
+vint16m2_t test_vreinterpret_v_bf16m2_i16m2(vbfloat16m2_t src) {
+  return __riscv_vreinterpret_v_bf16m2_i16m2(src);
+}
+
+vint16m4_t test_vreinterpret_v_bf16m4_i16m4(vbfloat16m4_t src) {
+  return __riscv_vreinterpret_v_bf16m4_i16m4(src);
+}
+
+vint16m8_t test_vreinterpret_v_bf16m8_i16m8(vbfloat16m8_t src) {
+  return __riscv_vreinterpret_v_bf16m8_i16m8(src);
+}
+
+vuint16mf4_t test_vreinterpret_v_bf16mf4_u16mf4(vbfloat16mf4_t src) {
+  return __riscv_vreinterpret_v_bf16mf4_u16mf4(src);
+}
+
+vuint16mf2_t test_vreinterpret_v_bf16mf2_u16mf2(vbfloat16mf2_t src) {
+  return __riscv_vreinterpret_v_bf16mf2_u16mf2(src);
+}
+
+vuint16m1_t test_vreinterpret_v_bf16m1_u16m1(vbfloat16m1_t src) {
+  return __riscv_vreinterpret_v_bf16m1_u16m1(src);
+}
+
+vuint16m2_t test_vreinterpret_v_bf16m2_u16m2(vbfloat16m2_t src) {
+  return __riscv_vreinterpret_v_bf16m2_u16m2(src);
+}
+
+vuint16m4_t test_vreinterpret_v_bf16m4_u16m4(vbfloat16m4_t src) {
+  return __riscv_vreinterpret_v_bf16m4_u16m4(src);
+}
+
+vuint16m8_t test_vreinterpret_v_bf16m8_u16m8(vbfloat16m8_t src) {
+  return __riscv_vreinterpret_v_bf16m8_u16m8(src);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vse16.c b/auto-generated/bfloat16/llvm-api-tests/vse16.c
new file mode 100644
index 000000000..1e38c43cc
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vse16.c
@@ -0,0 +1,63 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vse16_v_bf16mf4(__bf16 *rs1, vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vse16_v_bf16mf4(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16mf2(__bf16 *rs1, vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vse16_v_bf16mf2(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m1(__bf16 *rs1, vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vse16_v_bf16m1(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m2(__bf16 *rs1, vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vse16_v_bf16m2(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m4(__bf16 *rs1, vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vse16_v_bf16m4(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m8(__bf16 *rs1, vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vse16_v_bf16m8(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vbfloat16mf4_t vs3,
+                            size_t vl) {
+  return __riscv_vse16_v_bf16mf4_m(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vbfloat16mf2_t vs3,
+                            size_t vl) {
+  return __riscv_vse16_v_bf16mf2_m(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1_t vs3,
+                           size_t vl) {
+  return __riscv_vse16_v_bf16m1_m(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2_t vs3,
+                           size_t vl) {
+  return __riscv_vse16_v_bf16m2_m(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vbfloat16m4_t vs3,
+                           size_t vl) {
+  return __riscv_vse16_v_bf16m4_m(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vbfloat16m8_t vs3,
+                           size_t vl) {
+  return __riscv_vse16_v_bf16m8_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vset.c b/auto-generated/bfloat16/llvm-api-tests/vset.c
new file mode 100644
index 000000000..b38841089
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vset.c
@@ -0,0 +1,178 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16m2_t test_vset_v_bf16m1_bf16m2(vbfloat16m2_t dest, size_t index,
+                                        vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m2(dest, 0, value);
+}
+
+vbfloat16m4_t test_vset_v_bf16m1_bf16m4(vbfloat16m4_t dest, size_t index,
+                                        vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m4(dest, 0, value);
+}
+
+vbfloat16m4_t test_vset_v_bf16m2_bf16m4(vbfloat16m4_t dest, size_t index,
+                                        vbfloat16m2_t value) {
+  return __riscv_vset_v_bf16m2_bf16m4(dest, 0, value);
+}
+
+vbfloat16m8_t test_vset_v_bf16m1_bf16m8(vbfloat16m8_t dest, size_t index,
+                                        vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m8(dest, 0, value);
+}
+
+vbfloat16m8_t test_vset_v_bf16m2_bf16m8(vbfloat16m8_t dest, size_t index,
+                                        vbfloat16m2_t value) {
+  return __riscv_vset_v_bf16m2_bf16m8(dest, 0, value);
+}
+
+vbfloat16m8_t test_vset_v_bf16m4_bf16m8(vbfloat16m8_t dest, size_t index,
+                                        vbfloat16m4_t value) {
+  return __riscv_vset_v_bf16m4_bf16m8(dest, 0, value);
+}
+
+vbfloat16mf4x2_t test_vset_v_bf16mf4_bf16mf4x2(vbfloat16mf4x2_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset_v_bf16mf4_bf16mf4x2(dest, 0, value);
+}
+
+vbfloat16mf4x3_t test_vset_v_bf16mf4_bf16mf4x3(vbfloat16mf4x3_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset_v_bf16mf4_bf16mf4x3(dest, 0, value);
+}
+
+vbfloat16mf4x4_t test_vset_v_bf16mf4_bf16mf4x4(vbfloat16mf4x4_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset_v_bf16mf4_bf16mf4x4(dest, 0, value);
+}
+
+vbfloat16mf4x5_t test_vset_v_bf16mf4_bf16mf4x5(vbfloat16mf4x5_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset_v_bf16mf4_bf16mf4x5(dest, 0, value);
+}
+
+vbfloat16mf4x6_t test_vset_v_bf16mf4_bf16mf4x6(vbfloat16mf4x6_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset_v_bf16mf4_bf16mf4x6(dest, 0, value);
+}
+
+vbfloat16mf4x7_t test_vset_v_bf16mf4_bf16mf4x7(vbfloat16mf4x7_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset_v_bf16mf4_bf16mf4x7(dest, 0, value);
+}
+
+vbfloat16mf4x8_t test_vset_v_bf16mf4_bf16mf4x8(vbfloat16mf4x8_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset_v_bf16mf4_bf16mf4x8(dest, 0, value);
+}
+
+vbfloat16mf2x2_t test_vset_v_bf16mf2_bf16mf2x2(vbfloat16mf2x2_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset_v_bf16mf2_bf16mf2x2(dest, 0, value);
+}
+
+vbfloat16mf2x3_t test_vset_v_bf16mf2_bf16mf2x3(vbfloat16mf2x3_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset_v_bf16mf2_bf16mf2x3(dest, 0, value);
+}
+
+vbfloat16mf2x4_t test_vset_v_bf16mf2_bf16mf2x4(vbfloat16mf2x4_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset_v_bf16mf2_bf16mf2x4(dest, 0, value);
+}
+
+vbfloat16mf2x5_t test_vset_v_bf16mf2_bf16mf2x5(vbfloat16mf2x5_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset_v_bf16mf2_bf16mf2x5(dest, 0, value);
+}
+
+vbfloat16mf2x6_t test_vset_v_bf16mf2_bf16mf2x6(vbfloat16mf2x6_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset_v_bf16mf2_bf16mf2x6(dest, 0, value);
+}
+
+vbfloat16mf2x7_t test_vset_v_bf16mf2_bf16mf2x7(vbfloat16mf2x7_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset_v_bf16mf2_bf16mf2x7(dest, 0, value);
+}
+
+vbfloat16mf2x8_t test_vset_v_bf16mf2_bf16mf2x8(vbfloat16mf2x8_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset_v_bf16mf2_bf16mf2x8(dest, 0, value);
+}
+
+vbfloat16m1x2_t test_vset_v_bf16m1_bf16m1x2(vbfloat16m1x2_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m1x2(dest, 0, value);
+}
+
+vbfloat16m1x3_t test_vset_v_bf16m1_bf16m1x3(vbfloat16m1x3_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m1x3(dest, 0, value);
+}
+
+vbfloat16m1x4_t test_vset_v_bf16m1_bf16m1x4(vbfloat16m1x4_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m1x4(dest, 0, value);
+}
+
+vbfloat16m1x5_t test_vset_v_bf16m1_bf16m1x5(vbfloat16m1x5_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m1x5(dest, 0, value);
+}
+
+vbfloat16m1x6_t test_vset_v_bf16m1_bf16m1x6(vbfloat16m1x6_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m1x6(dest, 0, value);
+}
+
+vbfloat16m1x7_t test_vset_v_bf16m1_bf16m1x7(vbfloat16m1x7_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m1x7(dest, 0, value);
+}
+
+vbfloat16m1x8_t test_vset_v_bf16m1_bf16m1x8(vbfloat16m1x8_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset_v_bf16m1_bf16m1x8(dest, 0, value);
+}
+
+vbfloat16m2x2_t test_vset_v_bf16m2_bf16m2x2(vbfloat16m2x2_t dest, size_t index,
+                                            vbfloat16m2_t value) {
+  return __riscv_vset_v_bf16m2_bf16m2x2(dest, 0, value);
+}
+
+vbfloat16m2x3_t test_vset_v_bf16m2_bf16m2x3(vbfloat16m2x3_t dest, size_t index,
+                                            vbfloat16m2_t value) {
+  return __riscv_vset_v_bf16m2_bf16m2x3(dest, 0, value);
+}
+
+vbfloat16m2x4_t test_vset_v_bf16m2_bf16m2x4(vbfloat16m2x4_t dest, size_t index,
+                                            vbfloat16m2_t value) {
+  return __riscv_vset_v_bf16m2_bf16m2x4(dest, 0, value);
+}
+
+vbfloat16m4x2_t test_vset_v_bf16m4_bf16m4x2(vbfloat16m4x2_t dest, size_t index,
+                                            vbfloat16m4_t value) {
+  return __riscv_vset_v_bf16m4_bf16m4x2(dest, 0, value);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsoxei16.c b/auto-generated/bfloat16/llvm-api-tests/vsoxei16.c
new file mode 100644
index 000000000..83f1fd347
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsoxei16.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxei16_v_bf16mf4(__bf16 *rs1, vuint16mf4_t rs2, vbfloat16mf4_t vs3,
+                             size_t vl) {
+  return __riscv_vsoxei16_v_bf16mf4(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16mf2(__bf16 *rs1, vuint16mf2_t rs2, vbfloat16mf2_t vs3,
+                             size_t vl) {
+  return __riscv_vsoxei16_v_bf16mf2(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m1(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16_v_bf16m1(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m2(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16_v_bf16m2(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m4(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16_v_bf16m4(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m8(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16_v_bf16m8(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                               vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vsoxei16_v_bf16mf4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                               vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vsoxei16_v_bf16mf2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                              vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vsoxei16_v_bf16m1_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                              vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vsoxei16_v_bf16m2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                              vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vsoxei16_v_bf16m4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                              vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vsoxei16_v_bf16m8_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsoxseg2ei16.c b/auto-generated/bfloat16/llvm-api-tests/vsoxseg2ei16.c
new file mode 100644
index 000000000..c15bada5f
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsoxseg2ei16.c
@@ -0,0 +1,61 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxseg2ei16_v_bf16mf4x2(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16mf4x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16mf2x2(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16mf2x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m1x2(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16m1x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m2x2(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16m2x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m4x2(__bf16 *rs1, vuint16m4_t vs2,
+                                  vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16m4x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16mf4x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16mf2x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16m1x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16m2x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t vs2,
+                                    vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16_v_bf16m4x2_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsoxseg3ei16.c b/auto-generated/bfloat16/llvm-api-tests/vsoxseg3ei16.c
new file mode 100644
index 000000000..65fcbd53e
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsoxseg3ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxseg3ei16_v_bf16mf4x3(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16mf4x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16mf2x3(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16mf2x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m1x3(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16m1x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m2x3(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16m2x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16mf4x3_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16mf2x3_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16m1x3_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16_v_bf16m2x3_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsoxseg4ei16.c b/auto-generated/bfloat16/llvm-api-tests/vsoxseg4ei16.c
new file mode 100644
index 000000000..a19267e36
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsoxseg4ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxseg4ei16_v_bf16mf4x4(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16mf4x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16mf2x4(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16mf2x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m1x4(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16m1x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m2x4(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16m2x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16mf4x4_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16mf2x4_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16m1x4_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16_v_bf16m2x4_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsoxseg5ei16.c b/auto-generated/bfloat16/llvm-api-tests/vsoxseg5ei16.c
new file mode 100644
index 000000000..4ed520162
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsoxseg5ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxseg5ei16_v_bf16mf4x5(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16_v_bf16mf4x5(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16mf2x5(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16_v_bf16mf2x5(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16m1x5(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16_v_bf16m1x5(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg5ei16_v_bf16mf4x5_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg5ei16_v_bf16mf2x5_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16_v_bf16m1x5_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsoxseg6ei16.c b/auto-generated/bfloat16/llvm-api-tests/vsoxseg6ei16.c
new file mode 100644
index 000000000..c7f6d2afb
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsoxseg6ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxseg6ei16_v_bf16mf4x6(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16_v_bf16mf4x6(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16mf2x6(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16_v_bf16mf2x6(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16m1x6(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16_v_bf16m1x6(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg6ei16_v_bf16mf4x6_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg6ei16_v_bf16mf2x6_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16_v_bf16m1x6_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsoxseg7ei16.c b/auto-generated/bfloat16/llvm-api-tests/vsoxseg7ei16.c
new file mode 100644
index 000000000..1546a88f2
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsoxseg7ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxseg7ei16_v_bf16mf4x7(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16_v_bf16mf4x7(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16mf2x7(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16_v_bf16mf2x7(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16m1x7(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16_v_bf16m1x7(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg7ei16_v_bf16mf4x7_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg7ei16_v_bf16mf2x7_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16_v_bf16m1x7_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsoxseg8ei16.c b/auto-generated/bfloat16/llvm-api-tests/vsoxseg8ei16.c
new file mode 100644
index 000000000..c507e28db
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsoxseg8ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxseg8ei16_v_bf16mf4x8(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16_v_bf16mf4x8(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16mf2x8(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16_v_bf16mf2x8(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16m1x8(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16_v_bf16m1x8(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg8ei16_v_bf16mf4x8_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg8ei16_v_bf16mf2x8_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16_v_bf16m1x8_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsse16.c b/auto-generated/bfloat16/llvm-api-tests/vsse16.c
new file mode 100644
index 000000000..a066460f5
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsse16.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsse16_v_bf16mf4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4_t vs3,
+                           size_t vl) {
+  return __riscv_vsse16_v_bf16mf4(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16mf2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2_t vs3,
+                           size_t vl) {
+  return __riscv_vsse16_v_bf16mf2(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m1(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16_v_bf16m1(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16_v_bf16m2(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m4_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16_v_bf16m4(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m8(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m8_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16_v_bf16m8(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                             vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vsse16_v_bf16mf4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                             vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vsse16_v_bf16mf2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vsse16_v_bf16m1_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vsse16_v_bf16m2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vsse16_v_bf16m4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vsse16_v_bf16m8_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsseg2e16.c b/auto-generated/bfloat16/llvm-api-tests/vsseg2e16.c
new file mode 100644
index 000000000..2888efb67
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsseg2e16.c
@@ -0,0 +1,54 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsseg2e16_v_bf16mf4x2(__bf16 *rs1, vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16_v_bf16mf4x2(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16mf2x2(__bf16 *rs1, vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16_v_bf16mf2x2(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m1x2(__bf16 *rs1, vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16_v_bf16m1x2(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m2x2(__bf16 *rs1, vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16_v_bf16m2x2(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m4x2(__bf16 *rs1, vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16_v_bf16m4x2(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16_v_bf16mf4x2_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16_v_bf16mf2x2_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x2_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg2e16_v_bf16m1x2_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2x2_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg2e16_v_bf16m2x2_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vbfloat16m4x2_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg2e16_v_bf16m4x2_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsseg3e16.c b/auto-generated/bfloat16/llvm-api-tests/vsseg3e16.c
new file mode 100644
index 000000000..dffee040e
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsseg3e16.c
@@ -0,0 +1,45 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsseg3e16_v_bf16mf4x3(__bf16 *rs1, vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16_v_bf16mf4x3(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16mf2x3(__bf16 *rs1, vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16_v_bf16mf2x3(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m1x3(__bf16 *rs1, vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16_v_bf16m1x3(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m2x3(__bf16 *rs1, vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16_v_bf16m2x3(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16_v_bf16mf4x3_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16_v_bf16mf2x3_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x3_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg3e16_v_bf16m1x3_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2x3_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg3e16_v_bf16m2x3_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsseg4e16.c b/auto-generated/bfloat16/llvm-api-tests/vsseg4e16.c
new file mode 100644
index 000000000..87e8309e8
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsseg4e16.c
@@ -0,0 +1,45 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsseg4e16_v_bf16mf4x4(__bf16 *rs1, vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16_v_bf16mf4x4(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16mf2x4(__bf16 *rs1, vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16_v_bf16mf2x4(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m1x4(__bf16 *rs1, vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16_v_bf16m1x4(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m2x4(__bf16 *rs1, vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16_v_bf16m2x4(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16_v_bf16mf4x4_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16_v_bf16mf2x4_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x4_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg4e16_v_bf16m1x4_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2x4_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg4e16_v_bf16m2x4_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsseg5e16.c b/auto-generated/bfloat16/llvm-api-tests/vsseg5e16.c
new file mode 100644
index 000000000..2bcd2d84c
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsseg5e16.c
@@ -0,0 +1,36 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsseg5e16_v_bf16mf4x5(__bf16 *rs1, vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16_v_bf16mf4x5(rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16mf2x5(__bf16 *rs1, vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16_v_bf16mf2x5(rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16m1x5(__bf16 *rs1, vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16_v_bf16m1x5(rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16_v_bf16mf4x5_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16_v_bf16mf2x5_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x5_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg5e16_v_bf16m1x5_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsseg6e16.c b/auto-generated/bfloat16/llvm-api-tests/vsseg6e16.c
new file mode 100644
index 000000000..4520fc7eb
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsseg6e16.c
@@ -0,0 +1,36 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsseg6e16_v_bf16mf4x6(__bf16 *rs1, vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16_v_bf16mf4x6(rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16mf2x6(__bf16 *rs1, vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16_v_bf16mf2x6(rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16m1x6(__bf16 *rs1, vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16_v_bf16m1x6(rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16_v_bf16mf4x6_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16_v_bf16mf2x6_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x6_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg6e16_v_bf16m1x6_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsseg7e16.c b/auto-generated/bfloat16/llvm-api-tests/vsseg7e16.c
new file mode 100644
index 000000000..293726b5e
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsseg7e16.c
@@ -0,0 +1,36 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsseg7e16_v_bf16mf4x7(__bf16 *rs1, vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16_v_bf16mf4x7(rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16mf2x7(__bf16 *rs1, vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16_v_bf16mf2x7(rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16m1x7(__bf16 *rs1, vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16_v_bf16m1x7(rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16_v_bf16mf4x7_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16_v_bf16mf2x7_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x7_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg7e16_v_bf16m1x7_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsseg8e16.c b/auto-generated/bfloat16/llvm-api-tests/vsseg8e16.c
new file mode 100644
index 000000000..7245244d8
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsseg8e16.c
@@ -0,0 +1,36 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsseg8e16_v_bf16mf4x8(__bf16 *rs1, vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16_v_bf16mf4x8(rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16mf2x8(__bf16 *rs1, vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16_v_bf16mf2x8(rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16m1x8(__bf16 *rs1, vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16_v_bf16m1x8(rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16_v_bf16mf4x8_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16_v_bf16mf2x8_m(vm, rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x8_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg8e16_v_bf16m1x8_m(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vssseg2e16.c b/auto-generated/bfloat16/llvm-api-tests/vssseg2e16.c
new file mode 100644
index 000000000..763b20621
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vssseg2e16.c
@@ -0,0 +1,59 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vssseg2e16_v_bf16mf4x2(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16_v_bf16mf4x2(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16mf2x2(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16_v_bf16mf2x2(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m1x2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x2_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg2e16_v_bf16m1x2(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m2x2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x2_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg2e16_v_bf16m2x2(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m4x2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m4x2_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg2e16_v_bf16m4x2(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16_v_bf16mf4x2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16_v_bf16mf2x2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16_v_bf16m1x2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16_v_bf16m2x2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16_v_bf16m4x2_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vssseg3e16.c b/auto-generated/bfloat16/llvm-api-tests/vssseg3e16.c
new file mode 100644
index 000000000..fb8ff1d5b
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vssseg3e16.c
@@ -0,0 +1,49 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vssseg3e16_v_bf16mf4x3(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16_v_bf16mf4x3(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16mf2x3(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16_v_bf16mf2x3(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m1x3(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x3_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg3e16_v_bf16m1x3(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m2x3(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x3_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg3e16_v_bf16m2x3(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16_v_bf16mf4x3_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16_v_bf16mf2x3_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16_v_bf16m1x3_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16_v_bf16m2x3_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vssseg4e16.c b/auto-generated/bfloat16/llvm-api-tests/vssseg4e16.c
new file mode 100644
index 000000000..f5d97c2cc
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vssseg4e16.c
@@ -0,0 +1,49 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vssseg4e16_v_bf16mf4x4(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16_v_bf16mf4x4(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16mf2x4(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16_v_bf16mf2x4(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m1x4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x4_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg4e16_v_bf16m1x4(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m2x4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x4_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg4e16_v_bf16m2x4(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16_v_bf16mf4x4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16_v_bf16mf2x4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16_v_bf16m1x4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16_v_bf16m2x4_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vssseg5e16.c b/auto-generated/bfloat16/llvm-api-tests/vssseg5e16.c
new file mode 100644
index 000000000..9c4cef9d2
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vssseg5e16.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vssseg5e16_v_bf16mf4x5(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16_v_bf16mf4x5(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16mf2x5(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16_v_bf16mf2x5(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16m1x5(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x5_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg5e16_v_bf16m1x5(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16_v_bf16mf4x5_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16_v_bf16mf2x5_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16_v_bf16m1x5_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vssseg6e16.c b/auto-generated/bfloat16/llvm-api-tests/vssseg6e16.c
new file mode 100644
index 000000000..d0c431508
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vssseg6e16.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vssseg6e16_v_bf16mf4x6(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16_v_bf16mf4x6(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16mf2x6(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16_v_bf16mf2x6(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16m1x6(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x6_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg6e16_v_bf16m1x6(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16_v_bf16mf4x6_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16_v_bf16mf2x6_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16_v_bf16m1x6_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vssseg7e16.c b/auto-generated/bfloat16/llvm-api-tests/vssseg7e16.c
new file mode 100644
index 000000000..7a1f763b2
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vssseg7e16.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vssseg7e16_v_bf16mf4x7(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16_v_bf16mf4x7(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16mf2x7(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16_v_bf16mf2x7(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16m1x7(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x7_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg7e16_v_bf16m1x7(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16_v_bf16mf4x7_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16_v_bf16mf2x7_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16_v_bf16m1x7_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vssseg8e16.c b/auto-generated/bfloat16/llvm-api-tests/vssseg8e16.c
new file mode 100644
index 000000000..ee2b52988
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vssseg8e16.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vssseg8e16_v_bf16mf4x8(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16_v_bf16mf4x8(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16mf2x8(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16_v_bf16mf2x8(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16m1x8(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x8_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg8e16_v_bf16m1x8(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16_v_bf16mf4x8_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16_v_bf16mf2x8_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16_v_bf16m1x8_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsuxei16.c b/auto-generated/bfloat16/llvm-api-tests/vsuxei16.c
new file mode 100644
index 000000000..67212ebfb
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsuxei16.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxei16_v_bf16mf4(__bf16 *rs1, vuint16mf4_t rs2, vbfloat16mf4_t vs3,
+                             size_t vl) {
+  return __riscv_vsuxei16_v_bf16mf4(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16mf2(__bf16 *rs1, vuint16mf2_t rs2, vbfloat16mf2_t vs3,
+                             size_t vl) {
+  return __riscv_vsuxei16_v_bf16mf2(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m1(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16_v_bf16m1(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m2(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16_v_bf16m2(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m4(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16_v_bf16m4(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m8(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16_v_bf16m8(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                               vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vsuxei16_v_bf16mf4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                               vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vsuxei16_v_bf16mf2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                              vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vsuxei16_v_bf16m1_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                              vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vsuxei16_v_bf16m2_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                              vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vsuxei16_v_bf16m4_m(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                              vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vsuxei16_v_bf16m8_m(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsuxseg2ei16.c b/auto-generated/bfloat16/llvm-api-tests/vsuxseg2ei16.c
new file mode 100644
index 000000000..43721fe78
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsuxseg2ei16.c
@@ -0,0 +1,61 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxseg2ei16_v_bf16mf4x2(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16mf4x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16mf2x2(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16mf2x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m1x2(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16m1x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m2x2(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16m2x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m4x2(__bf16 *rs1, vuint16m4_t vs2,
+                                  vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16m4x2(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16mf4x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16mf2x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16m1x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16m2x2_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t vs2,
+                                    vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16_v_bf16m4x2_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsuxseg3ei16.c b/auto-generated/bfloat16/llvm-api-tests/vsuxseg3ei16.c
new file mode 100644
index 000000000..840e504bf
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsuxseg3ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxseg3ei16_v_bf16mf4x3(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16mf4x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16mf2x3(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16mf2x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m1x3(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16m1x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m2x3(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16m2x3(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16mf4x3_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16mf2x3_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16m1x3_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16_v_bf16m2x3_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsuxseg4ei16.c b/auto-generated/bfloat16/llvm-api-tests/vsuxseg4ei16.c
new file mode 100644
index 000000000..ad768d4ab
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsuxseg4ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxseg4ei16_v_bf16mf4x4(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16mf4x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16mf2x4(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16mf2x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m1x4(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16m1x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m2x4(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16m2x4(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16mf4x4_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16mf2x4_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16m1x4_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16_v_bf16m2x4_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsuxseg5ei16.c b/auto-generated/bfloat16/llvm-api-tests/vsuxseg5ei16.c
new file mode 100644
index 000000000..3cdf1d112
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsuxseg5ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxseg5ei16_v_bf16mf4x5(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16_v_bf16mf4x5(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16mf2x5(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16_v_bf16mf2x5(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16m1x5(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16_v_bf16m1x5(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg5ei16_v_bf16mf4x5_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg5ei16_v_bf16mf2x5_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16_v_bf16m1x5_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsuxseg6ei16.c b/auto-generated/bfloat16/llvm-api-tests/vsuxseg6ei16.c
new file mode 100644
index 000000000..32d69a6a3
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsuxseg6ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxseg6ei16_v_bf16mf4x6(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16_v_bf16mf4x6(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16mf2x6(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16_v_bf16mf2x6(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16m1x6(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16_v_bf16m1x6(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg6ei16_v_bf16mf4x6_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg6ei16_v_bf16mf2x6_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16_v_bf16m1x6_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsuxseg7ei16.c b/auto-generated/bfloat16/llvm-api-tests/vsuxseg7ei16.c
new file mode 100644
index 000000000..a041297a8
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsuxseg7ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxseg7ei16_v_bf16mf4x7(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16_v_bf16mf4x7(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16mf2x7(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16_v_bf16mf2x7(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16m1x7(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16_v_bf16m1x7(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg7ei16_v_bf16mf4x7_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg7ei16_v_bf16mf2x7_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16_v_bf16m1x7_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vsuxseg8ei16.c b/auto-generated/bfloat16/llvm-api-tests/vsuxseg8ei16.c
new file mode 100644
index 000000000..61f23e3aa
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vsuxseg8ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxseg8ei16_v_bf16mf4x8(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16_v_bf16mf4x8(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16mf2x8(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16_v_bf16mf2x8(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16m1x8(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16_v_bf16m1x8(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg8ei16_v_bf16mf4x8_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg8ei16_v_bf16mf2x8_m(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16_v_bf16m1x8_m(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-api-tests/vundefined.c b/auto-generated/bfloat16/llvm-api-tests/vundefined.c
new file mode 100644
index 000000000..0683dd94e
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-api-tests/vundefined.c
@@ -0,0 +1,125 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vundefined_bf16mf4() {
+  return __riscv_vundefined_bf16mf4();
+}
+
+vbfloat16mf2_t test_vundefined_bf16mf2() {
+  return __riscv_vundefined_bf16mf2();
+}
+
+vbfloat16m1_t test_vundefined_bf16m1() { return __riscv_vundefined_bf16m1(); }
+
+vbfloat16m2_t test_vundefined_bf16m2() { return __riscv_vundefined_bf16m2(); }
+
+vbfloat16m4_t test_vundefined_bf16m4() { return __riscv_vundefined_bf16m4(); }
+
+vbfloat16m8_t test_vundefined_bf16m8() { return __riscv_vundefined_bf16m8(); }
+
+vbfloat16mf4x2_t test_vundefined_bf16mf4x2() {
+  return __riscv_vundefined_bf16mf4x2();
+}
+
+vbfloat16mf4x3_t test_vundefined_bf16mf4x3() {
+  return __riscv_vundefined_bf16mf4x3();
+}
+
+vbfloat16mf4x4_t test_vundefined_bf16mf4x4() {
+  return __riscv_vundefined_bf16mf4x4();
+}
+
+vbfloat16mf4x5_t test_vundefined_bf16mf4x5() {
+  return __riscv_vundefined_bf16mf4x5();
+}
+
+vbfloat16mf4x6_t test_vundefined_bf16mf4x6() {
+  return __riscv_vundefined_bf16mf4x6();
+}
+
+vbfloat16mf4x7_t test_vundefined_bf16mf4x7() {
+  return __riscv_vundefined_bf16mf4x7();
+}
+
+vbfloat16mf4x8_t test_vundefined_bf16mf4x8() {
+  return __riscv_vundefined_bf16mf4x8();
+}
+
+vbfloat16mf2x2_t test_vundefined_bf16mf2x2() {
+  return __riscv_vundefined_bf16mf2x2();
+}
+
+vbfloat16mf2x3_t test_vundefined_bf16mf2x3() {
+  return __riscv_vundefined_bf16mf2x3();
+}
+
+vbfloat16mf2x4_t test_vundefined_bf16mf2x4() {
+  return __riscv_vundefined_bf16mf2x4();
+}
+
+vbfloat16mf2x5_t test_vundefined_bf16mf2x5() {
+  return __riscv_vundefined_bf16mf2x5();
+}
+
+vbfloat16mf2x6_t test_vundefined_bf16mf2x6() {
+  return __riscv_vundefined_bf16mf2x6();
+}
+
+vbfloat16mf2x7_t test_vundefined_bf16mf2x7() {
+  return __riscv_vundefined_bf16mf2x7();
+}
+
+vbfloat16mf2x8_t test_vundefined_bf16mf2x8() {
+  return __riscv_vundefined_bf16mf2x8();
+}
+
+vbfloat16m1x2_t test_vundefined_bf16m1x2() {
+  return __riscv_vundefined_bf16m1x2();
+}
+
+vbfloat16m1x3_t test_vundefined_bf16m1x3() {
+  return __riscv_vundefined_bf16m1x3();
+}
+
+vbfloat16m1x4_t test_vundefined_bf16m1x4() {
+  return __riscv_vundefined_bf16m1x4();
+}
+
+vbfloat16m1x5_t test_vundefined_bf16m1x5() {
+  return __riscv_vundefined_bf16m1x5();
+}
+
+vbfloat16m1x6_t test_vundefined_bf16m1x6() {
+  return __riscv_vundefined_bf16m1x6();
+}
+
+vbfloat16m1x7_t test_vundefined_bf16m1x7() {
+  return __riscv_vundefined_bf16m1x7();
+}
+
+vbfloat16m1x8_t test_vundefined_bf16m1x8() {
+  return __riscv_vundefined_bf16m1x8();
+}
+
+vbfloat16m2x2_t test_vundefined_bf16m2x2() {
+  return __riscv_vundefined_bf16m2x2();
+}
+
+vbfloat16m2x3_t test_vundefined_bf16m2x3() {
+  return __riscv_vundefined_bf16m2x3();
+}
+
+vbfloat16m2x4_t test_vundefined_bf16m2x4() {
+  return __riscv_vundefined_bf16m2x4();
+}
+
+vbfloat16m4x2_t test_vundefined_bf16m4x2() {
+  return __riscv_vundefined_bf16m4x2();
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vfncvtbf16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vfncvtbf16.c
new file mode 100644
index 000000000..4abf6b8b5
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vfncvtbf16.c
@@ -0,0 +1,99 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4(vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2(vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1(vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2(vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4(vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_m(vbool64_t vm, vfloat32mf2_t vs2,
+                                               size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_m(vbool32_t vm, vfloat32m1_t vs2,
+                                               size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_m(vbool16_t vm, vfloat32m2_t vs2,
+                                             size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_m(vbool8_t vm, vfloat32m4_t vs2,
+                                             size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_m(vbool4_t vm, vfloat32m8_t vs2,
+                                             size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm(vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm(vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm(vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm(vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm(vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf4_t
+test_vfncvtbf16_f_f_w_bf16mf4_rm_m(vbool64_t vm, vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_m(vbool32_t vm,
+                                                  vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_m(vbool16_t vm, vfloat32m2_t vs2,
+                                                size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_m(vbool8_t vm, vfloat32m4_t vs2,
+                                                size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_m(vbool4_t vm, vfloat32m8_t vs2,
+                                                size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, __RISCV_FRM_RNE, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vfwcvtbf16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vfwcvtbf16.c
new file mode 100644
index 000000000..b9fd6c616
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vfwcvtbf16.c
@@ -0,0 +1,54 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2(vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f(vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1(vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f(vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2(vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f(vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4(vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f(vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8(vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f(vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_m(vbool64_t vm, vbfloat16mf4_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwcvtbf16_f(vm, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_m(vbool32_t vm, vbfloat16mf2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f(vm, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_m(vbool16_t vm, vbfloat16m1_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f(vm, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_m(vbool8_t vm, vbfloat16m2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f(vm, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_m(vbool4_t vm, vbfloat16m4_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f(vm, vs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vfwmaccbf16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vfwmaccbf16.c
new file mode 100644
index 000000000..beac2b32a
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vfwmaccbf16.c
@@ -0,0 +1,230 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2(vfloat32mf2_t vd, vbfloat16mf4_t vs1,
+                                         vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2(vfloat32mf2_t vd, __bf16 vs1,
+                                         vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                       vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1(vfloat32m1_t vd, __bf16 vs1,
+                                       vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                       vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2(vfloat32m2_t vd, __bf16 vs1,
+                                       vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                       vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4(vfloat32m4_t vd, __bf16 vs1,
+                                       vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                       vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8(vfloat32m8_t vd, __bf16 vs1,
+                                       vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_m(vbool64_t vm, vfloat32mf2_t vd,
+                                           vbfloat16mf4_t vs1,
+                                           vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_m(vbool64_t vm, vfloat32mf2_t vd,
+                                           __bf16 vs1, vbfloat16mf4_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_m(vbool32_t vm, vfloat32m1_t vd,
+                                         vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_m(vbool32_t vm, vfloat32m1_t vd,
+                                         __bf16 vs1, vbfloat16mf2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_m(vbool16_t vm, vfloat32m2_t vd,
+                                         vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_m(vbool16_t vm, vfloat32m2_t vd,
+                                         __bf16 vs1, vbfloat16m1_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_m(vbool8_t vm, vfloat32m4_t vd,
+                                         vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_m(vbool8_t vm, vfloat32m4_t vd,
+                                         __bf16 vs1, vbfloat16m2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_m(vbool4_t vm, vfloat32m8_t vd,
+                                         vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_m(vbool4_t vm, vfloat32m8_t vd,
+                                         __bf16 vs1, vbfloat16m4_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm(vfloat32mf2_t vd,
+                                            vbfloat16mf4_t vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm(vfloat32mf2_t vd, __bf16 vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm(vfloat32m1_t vd, __bf16 vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                          vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm(vfloat32m2_t vd, __bf16 vs1,
+                                          vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                          vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm(vfloat32m4_t vd, __bf16 vs1,
+                                          vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                          vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm(vfloat32m8_t vd, __bf16 vs1,
+                                          vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_m(vbool64_t vm, vfloat32mf2_t vd,
+                                              vbfloat16mf4_t vs1,
+                                              vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_m(vbool64_t vm, vfloat32mf2_t vd,
+                                              __bf16 vs1, vbfloat16mf4_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_m(vbool32_t vm, vfloat32m1_t vd,
+                                            vbfloat16mf2_t vs1,
+                                            vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_m(vbool32_t vm, vfloat32m1_t vd,
+                                            __bf16 vs1, vbfloat16mf2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_m(vbool16_t vm, vfloat32m2_t vd,
+                                            vbfloat16m1_t vs1,
+                                            vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_m(vbool16_t vm, vfloat32m2_t vd,
+                                            __bf16 vs1, vbfloat16m1_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_m(vbool8_t vm, vfloat32m4_t vd,
+                                            vbfloat16m2_t vs1,
+                                            vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_m(vbool8_t vm, vfloat32m4_t vd,
+                                            __bf16 vs1, vbfloat16m2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_m(vbool4_t vm, vfloat32m8_t vd,
+                                            vbfloat16m4_t vs1,
+                                            vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_m(vbool4_t vm, vfloat32m8_t vd,
+                                            __bf16 vs1, vbfloat16m4_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vget.c b/auto-generated/bfloat16/llvm-overloaded-tests/vget.c
new file mode 100644
index 000000000..b29a6dcea
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vget.c
@@ -0,0 +1,147 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16m1_t test_vget_v_bf16m2_bf16m1(vbfloat16m2_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m4_bf16m1(vbfloat16m4_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m8_bf16m1(vbfloat16m8_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m4_bf16m2(vbfloat16m4_t src, size_t index) {
+  return __riscv_vget_bf16m2(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m8_bf16m2(vbfloat16m8_t src, size_t index) {
+  return __riscv_vget_bf16m2(src, 0);
+}
+
+vbfloat16m4_t test_vget_v_bf16m8_bf16m4(vbfloat16m8_t src, size_t index) {
+  return __riscv_vget_bf16m4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x2_bf16mf4(vbfloat16mf4x2_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x3_bf16mf4(vbfloat16mf4x3_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x4_bf16mf4(vbfloat16mf4x4_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x5_bf16mf4(vbfloat16mf4x5_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x6_bf16mf4(vbfloat16mf4x6_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x7_bf16mf4(vbfloat16mf4x7_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x8_bf16mf4(vbfloat16mf4x8_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf4(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x2_bf16mf2(vbfloat16mf2x2_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x3_bf16mf2(vbfloat16mf2x3_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x4_bf16mf2(vbfloat16mf2x4_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x5_bf16mf2(vbfloat16mf2x5_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x6_bf16mf2(vbfloat16mf2x6_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x7_bf16mf2(vbfloat16mf2x7_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x8_bf16mf2(vbfloat16mf2x8_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf2(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x2_bf16m1(vbfloat16m1x2_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x3_bf16m1(vbfloat16m1x3_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x4_bf16m1(vbfloat16m1x4_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x5_bf16m1(vbfloat16m1x5_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x6_bf16m1(vbfloat16m1x6_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x7_bf16m1(vbfloat16m1x7_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x8_bf16m1(vbfloat16m1x8_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m2x2_bf16m2(vbfloat16m2x2_t src, size_t index) {
+  return __riscv_vget_bf16m2(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m2x3_bf16m2(vbfloat16m2x3_t src, size_t index) {
+  return __riscv_vget_bf16m2(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m2x4_bf16m2(vbfloat16m2x4_t src, size_t index) {
+  return __riscv_vget_bf16m2(src, 0);
+}
+
+vbfloat16m4_t test_vget_v_bf16m4x2_bf16m4(vbfloat16m4x2_t src, size_t index) {
+  return __riscv_vget_bf16m4(src, 0);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vle16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vle16.c
new file mode 100644
index 000000000..62d9a4461
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vle16.c
@@ -0,0 +1,36 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                      size_t vl) {
+  return __riscv_vle16(vm, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                      size_t vl) {
+  return __riscv_vle16(vm, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                    size_t vl) {
+  return __riscv_vle16(vm, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16(vm, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16(vm, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vle16ff.c b/auto-generated/bfloat16/llvm-overloaded-tests/vle16ff.c
new file mode 100644
index 000000000..16c0e3a6a
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vle16ff.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlmul_ext_v.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlmul_ext_v.c
new file mode 100644
index 000000000..d8b6216c7
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlmul_ext_v.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf2_t test_vlmul_ext_v_bf16mf4_bf16mf2(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_ext_v_bf16mf4_bf16m1(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_ext_v_bf16mf4_bf16m2(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16mf4_bf16m4(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16mf4_bf16m8(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_bf16m8(value);
+}
+
+vbfloat16m1_t test_vlmul_ext_v_bf16mf2_bf16m1(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_ext_v_bf16mf2_bf16m2(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16mf2_bf16m4(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16mf2_bf16m8(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_bf16m8(value);
+}
+
+vbfloat16m2_t test_vlmul_ext_v_bf16m1_bf16m2(vbfloat16m1_t value) {
+  return __riscv_vlmul_ext_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16m1_bf16m4(vbfloat16m1_t value) {
+  return __riscv_vlmul_ext_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16m1_bf16m8(vbfloat16m1_t value) {
+  return __riscv_vlmul_ext_bf16m8(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16m2_bf16m4(vbfloat16m2_t value) {
+  return __riscv_vlmul_ext_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16m2_bf16m8(vbfloat16m2_t value) {
+  return __riscv_vlmul_ext_bf16m8(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16m4_bf16m8(vbfloat16m4_t value) {
+  return __riscv_vlmul_ext_bf16m8(value);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlmul_trunc_v.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlmul_trunc_v.c
new file mode 100644
index 000000000..826c0938c
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlmul_trunc_v.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16mf2_bf16mf4(vbfloat16mf2_t value) {
+  return __riscv_vlmul_trunc_bf16mf4(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m1_bf16mf4(vbfloat16m1_t value) {
+  return __riscv_vlmul_trunc_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m1_bf16mf2(vbfloat16m1_t value) {
+  return __riscv_vlmul_trunc_bf16mf2(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m2_bf16mf4(vbfloat16m2_t value) {
+  return __riscv_vlmul_trunc_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m2_bf16mf2(vbfloat16m2_t value) {
+  return __riscv_vlmul_trunc_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_trunc_v_bf16m2_bf16m1(vbfloat16m2_t value) {
+  return __riscv_vlmul_trunc_bf16m1(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m4_bf16mf4(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m4_bf16mf2(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_trunc_v_bf16m4_bf16m1(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_trunc_v_bf16m4_bf16m2(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_bf16m2(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m8_bf16mf4(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m8_bf16mf2(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_trunc_v_bf16m8_bf16m1(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_trunc_v_bf16m8_bf16m2(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_trunc_v_bf16m8_bf16m4(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_bf16m4(value);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vloxei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vloxei16.c
new file mode 100644
index 000000000..7a110728b
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vloxei16.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4(const __bf16 *rs1, vuint16mf4_t rs2,
+                                       size_t vl) {
+  return __riscv_vloxei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2(const __bf16 *rs1, vuint16mf2_t rs2,
+                                       size_t vl) {
+  return __riscv_vloxei16(rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16(rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16(rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4(const __bf16 *rs1, vuint16m4_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16(rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8(const __bf16 *rs1, vuint16m8_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                       vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                       vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                       vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                       vuint16m8_t rs2, size_t vl) {
+  return __riscv_vloxei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg2ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg2ei16.c
new file mode 100644
index 000000000..c90723a64
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg2ei16.c
@@ -0,0 +1,61 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2(const __bf16 *rs1, vuint16m4_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                               vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg3ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg3ei16.c
new file mode 100644
index 000000000..932af6df8
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg3ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg4ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg4ei16.c
new file mode 100644
index 000000000..0248eceb9
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg4ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg5ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg5ei16.c
new file mode 100644
index 000000000..0b6a5545a
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg5ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg5ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg6ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg6ei16.c
new file mode 100644
index 000000000..15bab6e22
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg6ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg6ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg7ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg7ei16.c
new file mode 100644
index 000000000..35f3110eb
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg7ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg7ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg8ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg8ei16.c
new file mode 100644
index 000000000..da2ae96f3
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vloxseg8ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg8ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlse16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlse16.c
new file mode 100644
index 000000000..919bcd2f8
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlse16.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlseg2e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg2e16.c
new file mode 100644
index 000000000..03050648a
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg2e16.c
@@ -0,0 +1,34 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg2e16(vm, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg2e16(vm, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg2e16(vm, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg2e16(vm, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg2e16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlseg2e16ff.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg2e16ff.c
new file mode 100644
index 000000000..c793a9c54
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg2e16ff.c
@@ -0,0 +1,34 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlseg3e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg3e16.c
new file mode 100644
index 000000000..49020d2a7
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg3e16.c
@@ -0,0 +1,29 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg3e16(vm, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg3e16(vm, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg3e16(vm, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg3e16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlseg3e16ff.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg3e16ff.c
new file mode 100644
index 000000000..d70aad5d6
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg3e16ff.c
@@ -0,0 +1,29 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlseg4e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg4e16.c
new file mode 100644
index 000000000..63c9ea79a
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg4e16.c
@@ -0,0 +1,29 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg4e16(vm, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg4e16(vm, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg4e16(vm, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg4e16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlseg4e16ff.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg4e16ff.c
new file mode 100644
index 000000000..0d64c4f33
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg4e16ff.c
@@ -0,0 +1,29 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlseg5e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg5e16.c
new file mode 100644
index 000000000..75127d1eb
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg5e16.c
@@ -0,0 +1,24 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg5e16(vm, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg5e16(vm, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg5e16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlseg5e16ff.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg5e16ff.c
new file mode 100644
index 000000000..c5a2a0154
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg5e16ff.c
@@ -0,0 +1,24 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlseg6e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg6e16.c
new file mode 100644
index 000000000..e5fcc000a
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg6e16.c
@@ -0,0 +1,24 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg6e16(vm, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg6e16(vm, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg6e16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlseg6e16ff.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg6e16ff.c
new file mode 100644
index 000000000..685309270
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg6e16ff.c
@@ -0,0 +1,24 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlseg7e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg7e16.c
new file mode 100644
index 000000000..c00fb5fee
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg7e16.c
@@ -0,0 +1,24 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg7e16(vm, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg7e16(vm, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg7e16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlseg7e16ff.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg7e16ff.c
new file mode 100644
index 000000000..2cf1a2a78
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg7e16ff.c
@@ -0,0 +1,24 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlseg8e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg8e16.c
new file mode 100644
index 000000000..eaf8cce70
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg8e16.c
@@ -0,0 +1,24 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg8e16(vm, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg8e16(vm, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg8e16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlseg8e16ff.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg8e16ff.c
new file mode 100644
index 000000000..16fc33400
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlseg8e16ff.c
@@ -0,0 +1,24 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg2e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg2e16.c
new file mode 100644
index 000000000..42950ac4f
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg2e16.c
@@ -0,0 +1,34 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg3e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg3e16.c
new file mode 100644
index 000000000..9f016d5ed
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg3e16.c
@@ -0,0 +1,29 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg4e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg4e16.c
new file mode 100644
index 000000000..f712d7d7b
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg4e16.c
@@ -0,0 +1,29 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg5e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg5e16.c
new file mode 100644
index 000000000..0add09a89
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg5e16.c
@@ -0,0 +1,24 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg6e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg6e16.c
new file mode 100644
index 000000000..1b0d9eadd
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg6e16.c
@@ -0,0 +1,24 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg7e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg7e16.c
new file mode 100644
index 000000000..02ab02068
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg7e16.c
@@ -0,0 +1,24 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg8e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg8e16.c
new file mode 100644
index 000000000..1b8457c0d
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vlsseg8e16.c
@@ -0,0 +1,24 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vluxei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vluxei16.c
new file mode 100644
index 000000000..96a8514fa
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vluxei16.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4(const __bf16 *rs1, vuint16mf4_t rs2,
+                                       size_t vl) {
+  return __riscv_vluxei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2(const __bf16 *rs1, vuint16mf2_t rs2,
+                                       size_t vl) {
+  return __riscv_vluxei16(rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16(rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16(rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4(const __bf16 *rs1, vuint16m4_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16(rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8(const __bf16 *rs1, vuint16m8_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                       vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                       vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                       vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                       vuint16m8_t rs2, size_t vl) {
+  return __riscv_vluxei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg2ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg2ei16.c
new file mode 100644
index 000000000..e0a2958f6
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg2ei16.c
@@ -0,0 +1,61 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2(const __bf16 *rs1, vuint16m4_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                               vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg3ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg3ei16.c
new file mode 100644
index 000000000..9a4f56698
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg3ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg4ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg4ei16.c
new file mode 100644
index 000000000..b2dbece41
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg4ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg5ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg5ei16.c
new file mode 100644
index 000000000..f2bff6a59
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg5ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg5ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg6ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg6ei16.c
new file mode 100644
index 000000000..bfb5959fe
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg6ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg6ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg7ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg7ei16.c
new file mode 100644
index 000000000..2ba7386ca
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg7ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg7ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg8ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg8ei16.c
new file mode 100644
index 000000000..4b0e1bb01
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vluxseg8ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg8ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vreinterpret.c b/auto-generated/bfloat16/llvm-overloaded-tests/vreinterpret.c
new file mode 100644
index 000000000..83d0af7de
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vreinterpret.c
@@ -0,0 +1,105 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vreinterpret_v_i16mf4_bf16mf4(vint16mf4_t src) {
+  return __riscv_vreinterpret_bf16mf4(src);
+}
+
+vbfloat16mf2_t test_vreinterpret_v_i16mf2_bf16mf2(vint16mf2_t src) {
+  return __riscv_vreinterpret_bf16mf2(src);
+}
+
+vbfloat16m1_t test_vreinterpret_v_i16m1_bf16m1(vint16m1_t src) {
+  return __riscv_vreinterpret_bf16m1(src);
+}
+
+vbfloat16m2_t test_vreinterpret_v_i16m2_bf16m2(vint16m2_t src) {
+  return __riscv_vreinterpret_bf16m2(src);
+}
+
+vbfloat16m4_t test_vreinterpret_v_i16m4_bf16m4(vint16m4_t src) {
+  return __riscv_vreinterpret_bf16m4(src);
+}
+
+vbfloat16m8_t test_vreinterpret_v_i16m8_bf16m8(vint16m8_t src) {
+  return __riscv_vreinterpret_bf16m8(src);
+}
+
+vbfloat16mf4_t test_vreinterpret_v_u16mf4_bf16mf4(vuint16mf4_t src) {
+  return __riscv_vreinterpret_bf16mf4(src);
+}
+
+vbfloat16mf2_t test_vreinterpret_v_u16mf2_bf16mf2(vuint16mf2_t src) {
+  return __riscv_vreinterpret_bf16mf2(src);
+}
+
+vbfloat16m1_t test_vreinterpret_v_u16m1_bf16m1(vuint16m1_t src) {
+  return __riscv_vreinterpret_bf16m1(src);
+}
+
+vbfloat16m2_t test_vreinterpret_v_u16m2_bf16m2(vuint16m2_t src) {
+  return __riscv_vreinterpret_bf16m2(src);
+}
+
+vbfloat16m4_t test_vreinterpret_v_u16m4_bf16m4(vuint16m4_t src) {
+  return __riscv_vreinterpret_bf16m4(src);
+}
+
+vbfloat16m8_t test_vreinterpret_v_u16m8_bf16m8(vuint16m8_t src) {
+  return __riscv_vreinterpret_bf16m8(src);
+}
+
+vint16mf4_t test_vreinterpret_v_bf16mf4_i16mf4(vbfloat16mf4_t src) {
+  return __riscv_vreinterpret_i16mf4(src);
+}
+
+vint16mf2_t test_vreinterpret_v_bf16mf2_i16mf2(vbfloat16mf2_t src) {
+  return __riscv_vreinterpret_i16mf2(src);
+}
+
+vint16m1_t test_vreinterpret_v_bf16m1_i16m1(vbfloat16m1_t src) {
+  return __riscv_vreinterpret_i16m1(src);
+}
+
+vint16m2_t test_vreinterpret_v_bf16m2_i16m2(vbfloat16m2_t src) {
+  return __riscv_vreinterpret_i16m2(src);
+}
+
+vint16m4_t test_vreinterpret_v_bf16m4_i16m4(vbfloat16m4_t src) {
+  return __riscv_vreinterpret_i16m4(src);
+}
+
+vint16m8_t test_vreinterpret_v_bf16m8_i16m8(vbfloat16m8_t src) {
+  return __riscv_vreinterpret_i16m8(src);
+}
+
+vuint16mf4_t test_vreinterpret_v_bf16mf4_u16mf4(vbfloat16mf4_t src) {
+  return __riscv_vreinterpret_u16mf4(src);
+}
+
+vuint16mf2_t test_vreinterpret_v_bf16mf2_u16mf2(vbfloat16mf2_t src) {
+  return __riscv_vreinterpret_u16mf2(src);
+}
+
+vuint16m1_t test_vreinterpret_v_bf16m1_u16m1(vbfloat16m1_t src) {
+  return __riscv_vreinterpret_u16m1(src);
+}
+
+vuint16m2_t test_vreinterpret_v_bf16m2_u16m2(vbfloat16m2_t src) {
+  return __riscv_vreinterpret_u16m2(src);
+}
+
+vuint16m4_t test_vreinterpret_v_bf16m4_u16m4(vbfloat16m4_t src) {
+  return __riscv_vreinterpret_u16m4(src);
+}
+
+vuint16m8_t test_vreinterpret_v_bf16m8_u16m8(vbfloat16m8_t src) {
+  return __riscv_vreinterpret_u16m8(src);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vse16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vse16.c
new file mode 100644
index 000000000..a87045603
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vse16.c
@@ -0,0 +1,63 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vse16_v_bf16mf4(__bf16 *rs1, vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vse16(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16mf2(__bf16 *rs1, vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vse16(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m1(__bf16 *rs1, vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vse16(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m2(__bf16 *rs1, vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vse16(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m4(__bf16 *rs1, vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vse16(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m8(__bf16 *rs1, vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vse16(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vbfloat16mf4_t vs3,
+                            size_t vl) {
+  return __riscv_vse16(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vbfloat16mf2_t vs3,
+                            size_t vl) {
+  return __riscv_vse16(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1_t vs3,
+                           size_t vl) {
+  return __riscv_vse16(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2_t vs3,
+                           size_t vl) {
+  return __riscv_vse16(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vbfloat16m4_t vs3,
+                           size_t vl) {
+  return __riscv_vse16(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vbfloat16m8_t vs3,
+                           size_t vl) {
+  return __riscv_vse16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vset.c b/auto-generated/bfloat16/llvm-overloaded-tests/vset.c
new file mode 100644
index 000000000..849480e67
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vset.c
@@ -0,0 +1,178 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16m2_t test_vset_v_bf16m1_bf16m2(vbfloat16m2_t dest, size_t index,
+                                        vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m4_t test_vset_v_bf16m1_bf16m4(vbfloat16m4_t dest, size_t index,
+                                        vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m4_t test_vset_v_bf16m2_bf16m4(vbfloat16m4_t dest, size_t index,
+                                        vbfloat16m2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m8_t test_vset_v_bf16m1_bf16m8(vbfloat16m8_t dest, size_t index,
+                                        vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m8_t test_vset_v_bf16m2_bf16m8(vbfloat16m8_t dest, size_t index,
+                                        vbfloat16m2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m8_t test_vset_v_bf16m4_bf16m8(vbfloat16m8_t dest, size_t index,
+                                        vbfloat16m4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf4x2_t test_vset_v_bf16mf4_bf16mf4x2(vbfloat16mf4x2_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf4x3_t test_vset_v_bf16mf4_bf16mf4x3(vbfloat16mf4x3_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf4x4_t test_vset_v_bf16mf4_bf16mf4x4(vbfloat16mf4x4_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf4x5_t test_vset_v_bf16mf4_bf16mf4x5(vbfloat16mf4x5_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf4x6_t test_vset_v_bf16mf4_bf16mf4x6(vbfloat16mf4x6_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf4x7_t test_vset_v_bf16mf4_bf16mf4x7(vbfloat16mf4x7_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf4x8_t test_vset_v_bf16mf4_bf16mf4x8(vbfloat16mf4x8_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf2x2_t test_vset_v_bf16mf2_bf16mf2x2(vbfloat16mf2x2_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf2x3_t test_vset_v_bf16mf2_bf16mf2x3(vbfloat16mf2x3_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf2x4_t test_vset_v_bf16mf2_bf16mf2x4(vbfloat16mf2x4_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf2x5_t test_vset_v_bf16mf2_bf16mf2x5(vbfloat16mf2x5_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf2x6_t test_vset_v_bf16mf2_bf16mf2x6(vbfloat16mf2x6_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf2x7_t test_vset_v_bf16mf2_bf16mf2x7(vbfloat16mf2x7_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf2x8_t test_vset_v_bf16mf2_bf16mf2x8(vbfloat16mf2x8_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m1x2_t test_vset_v_bf16m1_bf16m1x2(vbfloat16m1x2_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m1x3_t test_vset_v_bf16m1_bf16m1x3(vbfloat16m1x3_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m1x4_t test_vset_v_bf16m1_bf16m1x4(vbfloat16m1x4_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m1x5_t test_vset_v_bf16m1_bf16m1x5(vbfloat16m1x5_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m1x6_t test_vset_v_bf16m1_bf16m1x6(vbfloat16m1x6_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m1x7_t test_vset_v_bf16m1_bf16m1x7(vbfloat16m1x7_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m1x8_t test_vset_v_bf16m1_bf16m1x8(vbfloat16m1x8_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m2x2_t test_vset_v_bf16m2_bf16m2x2(vbfloat16m2x2_t dest, size_t index,
+                                            vbfloat16m2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m2x3_t test_vset_v_bf16m2_bf16m2x3(vbfloat16m2x3_t dest, size_t index,
+                                            vbfloat16m2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m2x4_t test_vset_v_bf16m2_bf16m2x4(vbfloat16m2x4_t dest, size_t index,
+                                            vbfloat16m2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m4x2_t test_vset_v_bf16m4_bf16m4x2(vbfloat16m4x2_t dest, size_t index,
+                                            vbfloat16m4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsoxei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxei16.c
new file mode 100644
index 000000000..007b74642
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxei16.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxei16_v_bf16mf4(__bf16 *rs1, vuint16mf4_t rs2, vbfloat16mf4_t vs3,
+                             size_t vl) {
+  return __riscv_vsoxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16mf2(__bf16 *rs1, vuint16mf2_t rs2, vbfloat16mf2_t vs3,
+                             size_t vl) {
+  return __riscv_vsoxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m1(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m2(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m4(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m8(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                               vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vsoxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                               vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vsoxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                              vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vsoxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                              vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vsoxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                              vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vsoxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                              vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vsoxei16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg2ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg2ei16.c
new file mode 100644
index 000000000..9a27bb5d2
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg2ei16.c
@@ -0,0 +1,61 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxseg2ei16_v_bf16mf4x2(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16mf2x2(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m1x2(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m2x2(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m4x2(__bf16 *rs1, vuint16m4_t vs2,
+                                  vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t vs2,
+                                    vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg3ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg3ei16.c
new file mode 100644
index 000000000..19cd07be9
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg3ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxseg3ei16_v_bf16mf4x3(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16mf2x3(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m1x3(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m2x3(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg4ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg4ei16.c
new file mode 100644
index 000000000..e21805337
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg4ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxseg4ei16_v_bf16mf4x4(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16mf2x4(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m1x4(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m2x4(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg5ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg5ei16.c
new file mode 100644
index 000000000..fa96e304d
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg5ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxseg5ei16_v_bf16mf4x5(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16mf2x5(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16m1x5(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg5ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg5ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg6ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg6ei16.c
new file mode 100644
index 000000000..3572e5116
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg6ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxseg6ei16_v_bf16mf4x6(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16mf2x6(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16m1x6(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg6ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg6ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg7ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg7ei16.c
new file mode 100644
index 000000000..7b99b6239
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg7ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxseg7ei16_v_bf16mf4x7(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16mf2x7(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16m1x7(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg7ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg7ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg8ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg8ei16.c
new file mode 100644
index 000000000..989430e15
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsoxseg8ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsoxseg8ei16_v_bf16mf4x8(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16mf2x8(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16m1x8(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg8ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg8ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsse16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsse16.c
new file mode 100644
index 000000000..98f228e23
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsse16.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsse16_v_bf16mf4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4_t vs3,
+                           size_t vl) {
+  return __riscv_vsse16(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16mf2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2_t vs3,
+                           size_t vl) {
+  return __riscv_vsse16(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m1(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m4_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m8(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m8_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                             vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vsse16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                             vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vsse16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vsse16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vsse16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vsse16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vsse16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsseg2e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsseg2e16.c
new file mode 100644
index 000000000..581644b69
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsseg2e16.c
@@ -0,0 +1,54 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsseg2e16_v_bf16mf4x2(__bf16 *rs1, vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16mf2x2(__bf16 *rs1, vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m1x2(__bf16 *rs1, vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m2x2(__bf16 *rs1, vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m4x2(__bf16 *rs1, vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x2_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg2e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2x2_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg2e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vbfloat16m4x2_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg2e16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsseg3e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsseg3e16.c
new file mode 100644
index 000000000..d68d5d475
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsseg3e16.c
@@ -0,0 +1,45 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsseg3e16_v_bf16mf4x3(__bf16 *rs1, vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16mf2x3(__bf16 *rs1, vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m1x3(__bf16 *rs1, vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m2x3(__bf16 *rs1, vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x3_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg3e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2x3_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg3e16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsseg4e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsseg4e16.c
new file mode 100644
index 000000000..edc7e396e
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsseg4e16.c
@@ -0,0 +1,45 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsseg4e16_v_bf16mf4x4(__bf16 *rs1, vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16mf2x4(__bf16 *rs1, vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m1x4(__bf16 *rs1, vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m2x4(__bf16 *rs1, vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x4_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg4e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2x4_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg4e16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsseg5e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsseg5e16.c
new file mode 100644
index 000000000..7f59d47b4
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsseg5e16.c
@@ -0,0 +1,36 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsseg5e16_v_bf16mf4x5(__bf16 *rs1, vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16(rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16mf2x5(__bf16 *rs1, vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16(rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16m1x5(__bf16 *rs1, vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16(rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x5_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg5e16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsseg6e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsseg6e16.c
new file mode 100644
index 000000000..5388501b2
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsseg6e16.c
@@ -0,0 +1,36 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsseg6e16_v_bf16mf4x6(__bf16 *rs1, vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16(rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16mf2x6(__bf16 *rs1, vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16(rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16m1x6(__bf16 *rs1, vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16(rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x6_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg6e16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsseg7e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsseg7e16.c
new file mode 100644
index 000000000..1d67708e0
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsseg7e16.c
@@ -0,0 +1,36 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsseg7e16_v_bf16mf4x7(__bf16 *rs1, vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16(rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16mf2x7(__bf16 *rs1, vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16(rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16m1x7(__bf16 *rs1, vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16(rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x7_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg7e16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsseg8e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsseg8e16.c
new file mode 100644
index 000000000..2ca7b5488
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsseg8e16.c
@@ -0,0 +1,36 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsseg8e16_v_bf16mf4x8(__bf16 *rs1, vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16(rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16mf2x8(__bf16 *rs1, vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16(rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16m1x8(__bf16 *rs1, vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16(rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x8_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg8e16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vssseg2e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vssseg2e16.c
new file mode 100644
index 000000000..a923b682f
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vssseg2e16.c
@@ -0,0 +1,59 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vssseg2e16_v_bf16mf4x2(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16mf2x2(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m1x2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x2_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg2e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m2x2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x2_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg2e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m4x2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m4x2_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg2e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vssseg3e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vssseg3e16.c
new file mode 100644
index 000000000..4ceac5fc2
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vssseg3e16.c
@@ -0,0 +1,49 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vssseg3e16_v_bf16mf4x3(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16mf2x3(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m1x3(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x3_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg3e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m2x3(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x3_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg3e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vssseg4e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vssseg4e16.c
new file mode 100644
index 000000000..46f7ad43f
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vssseg4e16.c
@@ -0,0 +1,49 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vssseg4e16_v_bf16mf4x4(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16mf2x4(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m1x4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x4_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg4e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m2x4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x4_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg4e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vssseg5e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vssseg5e16.c
new file mode 100644
index 000000000..8d2c14637
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vssseg5e16.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vssseg5e16_v_bf16mf4x5(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16mf2x5(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16m1x5(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x5_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg5e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vssseg6e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vssseg6e16.c
new file mode 100644
index 000000000..d73f4b713
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vssseg6e16.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vssseg6e16_v_bf16mf4x6(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16mf2x6(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16m1x6(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x6_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg6e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vssseg7e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vssseg7e16.c
new file mode 100644
index 000000000..d70ca8db5
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vssseg7e16.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vssseg7e16_v_bf16mf4x7(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16mf2x7(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16m1x7(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x7_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg7e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vssseg8e16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vssseg8e16.c
new file mode 100644
index 000000000..98c949644
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vssseg8e16.c
@@ -0,0 +1,39 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vssseg8e16_v_bf16mf4x8(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16mf2x8(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16m1x8(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x8_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg8e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsuxei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxei16.c
new file mode 100644
index 000000000..99298c79d
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxei16.c
@@ -0,0 +1,69 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxei16_v_bf16mf4(__bf16 *rs1, vuint16mf4_t rs2, vbfloat16mf4_t vs3,
+                             size_t vl) {
+  return __riscv_vsuxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16mf2(__bf16 *rs1, vuint16mf2_t rs2, vbfloat16mf2_t vs3,
+                             size_t vl) {
+  return __riscv_vsuxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m1(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m2(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m4(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m8(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                               vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vsuxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                               vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vsuxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                              vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vsuxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                              vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vsuxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                              vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vsuxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                              vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vsuxei16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg2ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg2ei16.c
new file mode 100644
index 000000000..6ef154030
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg2ei16.c
@@ -0,0 +1,61 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxseg2ei16_v_bf16mf4x2(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16mf2x2(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m1x2(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m2x2(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m4x2(__bf16 *rs1, vuint16m4_t vs2,
+                                  vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t vs2,
+                                    vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg3ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg3ei16.c
new file mode 100644
index 000000000..a1c1c3435
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg3ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxseg3ei16_v_bf16mf4x3(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16mf2x3(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m1x3(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m2x3(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg4ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg4ei16.c
new file mode 100644
index 000000000..c5f3c5cb4
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg4ei16.c
@@ -0,0 +1,51 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxseg4ei16_v_bf16mf4x4(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16mf2x4(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m1x4(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m2x4(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg5ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg5ei16.c
new file mode 100644
index 000000000..3c7d47dd3
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg5ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxseg5ei16_v_bf16mf4x5(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16mf2x5(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16m1x5(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg5ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg5ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg6ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg6ei16.c
new file mode 100644
index 000000000..942e4ff4f
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg6ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxseg6ei16_v_bf16mf4x6(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16mf2x6(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16m1x6(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg6ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg6ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg7ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg7ei16.c
new file mode 100644
index 000000000..68ec0a391
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg7ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxseg7ei16_v_bf16mf4x7(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16mf2x7(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16m1x7(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg7ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg7ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg8ei16.c b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg8ei16.c
new file mode 100644
index 000000000..838e6b721
--- /dev/null
+++ b/auto-generated/bfloat16/llvm-overloaded-tests/vsuxseg8ei16.c
@@ -0,0 +1,41 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+void test_vsuxseg8ei16_v_bf16mf4x8(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16mf2x8(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16m1x8(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg8ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg8ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vfncvtbf16.c b/auto-generated/bfloat16/overloaded-api-testing/vfncvtbf16.c
new file mode 100644
index 000000000..d402fd187
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vfncvtbf16.c
@@ -0,0 +1,92 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4(vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2(vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1(vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2(vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4(vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_m(vbool64_t vm, vfloat32mf2_t vs2,
+                                               size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_m(vbool32_t vm, vfloat32m1_t vs2,
+                                               size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_m(vbool16_t vm, vfloat32m2_t vs2,
+                                             size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_m(vbool8_t vm, vfloat32m4_t vs2,
+                                             size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_m(vbool4_t vm, vfloat32m8_t vs2,
+                                             size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm(vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm(vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm(vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm(vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm(vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf4_t
+test_vfncvtbf16_f_f_w_bf16mf4_rm_m(vbool64_t vm, vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_m(vbool32_t vm,
+                                                  vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_m(vbool16_t vm, vfloat32m2_t vs2,
+                                                size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_m(vbool8_t vm, vfloat32m4_t vs2,
+                                                size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_m(vbool4_t vm, vfloat32m8_t vs2,
+                                                size_t vl) {
+  return __riscv_vfncvtbf16_f(vm, vs2, __RISCV_FRM_RNE, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vfwcvtbf16.c b/auto-generated/bfloat16/overloaded-api-testing/vfwcvtbf16.c
new file mode 100644
index 000000000..9e0306536
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vfwcvtbf16.c
@@ -0,0 +1,47 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2(vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f(vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1(vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f(vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2(vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f(vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4(vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f(vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8(vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f(vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_m(vbool64_t vm, vbfloat16mf4_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwcvtbf16_f(vm, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_m(vbool32_t vm, vbfloat16mf2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f(vm, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_m(vbool16_t vm, vbfloat16m1_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f(vm, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_m(vbool8_t vm, vbfloat16m2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f(vm, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_m(vbool4_t vm, vbfloat16m4_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwcvtbf16_f(vm, vs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vfwmaccbf16.c b/auto-generated/bfloat16/overloaded-api-testing/vfwmaccbf16.c
new file mode 100644
index 000000000..19c317e42
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vfwmaccbf16.c
@@ -0,0 +1,223 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2(vfloat32mf2_t vd, vbfloat16mf4_t vs1,
+                                         vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2(vfloat32mf2_t vd, __bf16 vs1,
+                                         vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                       vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1(vfloat32m1_t vd, __bf16 vs1,
+                                       vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                       vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2(vfloat32m2_t vd, __bf16 vs1,
+                                       vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                       vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4(vfloat32m4_t vd, __bf16 vs1,
+                                       vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                       vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8(vfloat32m8_t vd, __bf16 vs1,
+                                       vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_m(vbool64_t vm, vfloat32mf2_t vd,
+                                           vbfloat16mf4_t vs1,
+                                           vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_m(vbool64_t vm, vfloat32mf2_t vd,
+                                           __bf16 vs1, vbfloat16mf4_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_m(vbool32_t vm, vfloat32m1_t vd,
+                                         vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_m(vbool32_t vm, vfloat32m1_t vd,
+                                         __bf16 vs1, vbfloat16mf2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_m(vbool16_t vm, vfloat32m2_t vd,
+                                         vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_m(vbool16_t vm, vfloat32m2_t vd,
+                                         __bf16 vs1, vbfloat16m1_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_m(vbool8_t vm, vfloat32m4_t vd,
+                                         vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_m(vbool8_t vm, vfloat32m4_t vd,
+                                         __bf16 vs1, vbfloat16m2_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_m(vbool4_t vm, vfloat32m8_t vd,
+                                         vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_m(vbool4_t vm, vfloat32m8_t vd,
+                                         __bf16 vs1, vbfloat16m4_t vs2,
+                                         size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm(vfloat32mf2_t vd,
+                                            vbfloat16mf4_t vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm(vfloat32mf2_t vd, __bf16 vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm(vfloat32m1_t vd, __bf16 vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                          vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm(vfloat32m2_t vd, __bf16 vs1,
+                                          vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                          vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm(vfloat32m4_t vd, __bf16 vs1,
+                                          vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                          vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm(vfloat32m8_t vd, __bf16 vs1,
+                                          vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_m(vbool64_t vm, vfloat32mf2_t vd,
+                                              vbfloat16mf4_t vs1,
+                                              vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_m(vbool64_t vm, vfloat32mf2_t vd,
+                                              __bf16 vs1, vbfloat16mf4_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_m(vbool32_t vm, vfloat32m1_t vd,
+                                            vbfloat16mf2_t vs1,
+                                            vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_m(vbool32_t vm, vfloat32m1_t vd,
+                                            __bf16 vs1, vbfloat16mf2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_m(vbool16_t vm, vfloat32m2_t vd,
+                                            vbfloat16m1_t vs1,
+                                            vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_m(vbool16_t vm, vfloat32m2_t vd,
+                                            __bf16 vs1, vbfloat16m1_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_m(vbool8_t vm, vfloat32m4_t vd,
+                                            vbfloat16m2_t vs1,
+                                            vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_m(vbool8_t vm, vfloat32m4_t vd,
+                                            __bf16 vs1, vbfloat16m2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_m(vbool4_t vm, vfloat32m8_t vd,
+                                            vbfloat16m4_t vs1,
+                                            vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_m(vbool4_t vm, vfloat32m8_t vd,
+                                            __bf16 vs1, vbfloat16m4_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vget.c b/auto-generated/bfloat16/overloaded-api-testing/vget.c
new file mode 100644
index 000000000..f249b3faf
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vget.c
@@ -0,0 +1,140 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16m1_t test_vget_v_bf16m2_bf16m1(vbfloat16m2_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m4_bf16m1(vbfloat16m4_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m8_bf16m1(vbfloat16m8_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m4_bf16m2(vbfloat16m4_t src, size_t index) {
+  return __riscv_vget_bf16m2(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m8_bf16m2(vbfloat16m8_t src, size_t index) {
+  return __riscv_vget_bf16m2(src, 0);
+}
+
+vbfloat16m4_t test_vget_v_bf16m8_bf16m4(vbfloat16m8_t src, size_t index) {
+  return __riscv_vget_bf16m4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x2_bf16mf4(vbfloat16mf4x2_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x3_bf16mf4(vbfloat16mf4x3_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x4_bf16mf4(vbfloat16mf4x4_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x5_bf16mf4(vbfloat16mf4x5_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x6_bf16mf4(vbfloat16mf4x6_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x7_bf16mf4(vbfloat16mf4x7_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf4(src, 0);
+}
+
+vbfloat16mf4_t test_vget_v_bf16mf4x8_bf16mf4(vbfloat16mf4x8_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf4(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x2_bf16mf2(vbfloat16mf2x2_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x3_bf16mf2(vbfloat16mf2x3_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x4_bf16mf2(vbfloat16mf2x4_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x5_bf16mf2(vbfloat16mf2x5_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x6_bf16mf2(vbfloat16mf2x6_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x7_bf16mf2(vbfloat16mf2x7_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf2(src, 0);
+}
+
+vbfloat16mf2_t test_vget_v_bf16mf2x8_bf16mf2(vbfloat16mf2x8_t src,
+                                             size_t index) {
+  return __riscv_vget_bf16mf2(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x2_bf16m1(vbfloat16m1x2_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x3_bf16m1(vbfloat16m1x3_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x4_bf16m1(vbfloat16m1x4_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x5_bf16m1(vbfloat16m1x5_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x6_bf16m1(vbfloat16m1x6_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x7_bf16m1(vbfloat16m1x7_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m1_t test_vget_v_bf16m1x8_bf16m1(vbfloat16m1x8_t src, size_t index) {
+  return __riscv_vget_bf16m1(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m2x2_bf16m2(vbfloat16m2x2_t src, size_t index) {
+  return __riscv_vget_bf16m2(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m2x3_bf16m2(vbfloat16m2x3_t src, size_t index) {
+  return __riscv_vget_bf16m2(src, 0);
+}
+
+vbfloat16m2_t test_vget_v_bf16m2x4_bf16m2(vbfloat16m2x4_t src, size_t index) {
+  return __riscv_vget_bf16m2(src, 0);
+}
+
+vbfloat16m4_t test_vget_v_bf16m4x2_bf16m4(vbfloat16m4x2_t src, size_t index) {
+  return __riscv_vget_bf16m4(src, 0);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vle16.c b/auto-generated/bfloat16/overloaded-api-testing/vle16.c
new file mode 100644
index 000000000..2e0ef5c7c
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vle16.c
@@ -0,0 +1,29 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                      size_t vl) {
+  return __riscv_vle16(vm, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                      size_t vl) {
+  return __riscv_vle16(vm, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                    size_t vl) {
+  return __riscv_vle16(vm, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16(vm, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16(vm, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vle16ff.c b/auto-generated/bfloat16/overloaded-api-testing/vle16ff.c
new file mode 100644
index 000000000..34da33989
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vle16ff.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                      size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlmul_ext_v.c b/auto-generated/bfloat16/overloaded-api-testing/vlmul_ext_v.c
new file mode 100644
index 000000000..b26e1401c
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlmul_ext_v.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf2_t test_vlmul_ext_v_bf16mf4_bf16mf2(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_ext_v_bf16mf4_bf16m1(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_ext_v_bf16mf4_bf16m2(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16mf4_bf16m4(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16mf4_bf16m8(vbfloat16mf4_t value) {
+  return __riscv_vlmul_ext_bf16m8(value);
+}
+
+vbfloat16m1_t test_vlmul_ext_v_bf16mf2_bf16m1(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_ext_v_bf16mf2_bf16m2(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16mf2_bf16m4(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16mf2_bf16m8(vbfloat16mf2_t value) {
+  return __riscv_vlmul_ext_bf16m8(value);
+}
+
+vbfloat16m2_t test_vlmul_ext_v_bf16m1_bf16m2(vbfloat16m1_t value) {
+  return __riscv_vlmul_ext_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16m1_bf16m4(vbfloat16m1_t value) {
+  return __riscv_vlmul_ext_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16m1_bf16m8(vbfloat16m1_t value) {
+  return __riscv_vlmul_ext_bf16m8(value);
+}
+
+vbfloat16m4_t test_vlmul_ext_v_bf16m2_bf16m4(vbfloat16m2_t value) {
+  return __riscv_vlmul_ext_bf16m4(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16m2_bf16m8(vbfloat16m2_t value) {
+  return __riscv_vlmul_ext_bf16m8(value);
+}
+
+vbfloat16m8_t test_vlmul_ext_v_bf16m4_bf16m8(vbfloat16m4_t value) {
+  return __riscv_vlmul_ext_bf16m8(value);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlmul_trunc_v.c b/auto-generated/bfloat16/overloaded-api-testing/vlmul_trunc_v.c
new file mode 100644
index 000000000..96b46c1e8
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlmul_trunc_v.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16mf2_bf16mf4(vbfloat16mf2_t value) {
+  return __riscv_vlmul_trunc_bf16mf4(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m1_bf16mf4(vbfloat16m1_t value) {
+  return __riscv_vlmul_trunc_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m1_bf16mf2(vbfloat16m1_t value) {
+  return __riscv_vlmul_trunc_bf16mf2(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m2_bf16mf4(vbfloat16m2_t value) {
+  return __riscv_vlmul_trunc_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m2_bf16mf2(vbfloat16m2_t value) {
+  return __riscv_vlmul_trunc_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_trunc_v_bf16m2_bf16m1(vbfloat16m2_t value) {
+  return __riscv_vlmul_trunc_bf16m1(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m4_bf16mf4(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m4_bf16mf2(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_trunc_v_bf16m4_bf16m1(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_trunc_v_bf16m4_bf16m2(vbfloat16m4_t value) {
+  return __riscv_vlmul_trunc_bf16m2(value);
+}
+
+vbfloat16mf4_t test_vlmul_trunc_v_bf16m8_bf16mf4(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_bf16mf4(value);
+}
+
+vbfloat16mf2_t test_vlmul_trunc_v_bf16m8_bf16mf2(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_bf16mf2(value);
+}
+
+vbfloat16m1_t test_vlmul_trunc_v_bf16m8_bf16m1(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_bf16m1(value);
+}
+
+vbfloat16m2_t test_vlmul_trunc_v_bf16m8_bf16m2(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_bf16m2(value);
+}
+
+vbfloat16m4_t test_vlmul_trunc_v_bf16m8_bf16m4(vbfloat16m8_t value) {
+  return __riscv_vlmul_trunc_bf16m4(value);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vloxei16.c b/auto-generated/bfloat16/overloaded-api-testing/vloxei16.c
new file mode 100644
index 000000000..a32bd147c
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vloxei16.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4(const __bf16 *rs1, vuint16mf4_t rs2,
+                                       size_t vl) {
+  return __riscv_vloxei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2(const __bf16 *rs1, vuint16mf2_t rs2,
+                                       size_t vl) {
+  return __riscv_vloxei16(rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16(rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16(rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4(const __bf16 *rs1, vuint16m4_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16(rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8(const __bf16 *rs1, vuint16m8_t rs2,
+                                     size_t vl) {
+  return __riscv_vloxei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                       vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                       vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                       vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                       vuint16m8_t rs2, size_t vl) {
+  return __riscv_vloxei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vloxseg2ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vloxseg2ei16.c
new file mode 100644
index 000000000..06999c6b0
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vloxseg2ei16.c
@@ -0,0 +1,54 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2(const __bf16 *rs1, vuint16m4_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                               vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vloxseg3ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vloxseg3ei16.c
new file mode 100644
index 000000000..1534ddfde
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vloxseg3ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vloxseg4ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vloxseg4ei16.c
new file mode 100644
index 000000000..25543e43b
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vloxseg4ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vloxseg5ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vloxseg5ei16.c
new file mode 100644
index 000000000..cb842f8d3
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vloxseg5ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg5ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vloxseg6ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vloxseg6ei16.c
new file mode 100644
index 000000000..866ca7f8c
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vloxseg6ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg6ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vloxseg7ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vloxseg7ei16.c
new file mode 100644
index 000000000..788934129
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vloxseg7ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg7ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vloxseg8ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vloxseg8ei16.c
new file mode 100644
index 000000000..001837f44
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vloxseg8ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vloxseg8ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlse16.c b/auto-generated/bfloat16/overloaded-api-testing/vlse16.c
new file mode 100644
index 000000000..120ce69e7
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlse16.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                     ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlseg2e16.c b/auto-generated/bfloat16/overloaded-api-testing/vlseg2e16.c
new file mode 100644
index 000000000..4d3292d0a
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlseg2e16.c
@@ -0,0 +1,27 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg2e16(vm, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg2e16(vm, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg2e16(vm, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg2e16(vm, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg2e16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlseg2e16ff.c b/auto-generated/bfloat16/overloaded-api-testing/vlseg2e16ff.c
new file mode 100644
index 000000000..53ba3ba68
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlseg2e16ff.c
@@ -0,0 +1,27 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlseg3e16.c b/auto-generated/bfloat16/overloaded-api-testing/vlseg3e16.c
new file mode 100644
index 000000000..a3cf2b4de
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlseg3e16.c
@@ -0,0 +1,22 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg3e16(vm, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg3e16(vm, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg3e16(vm, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg3e16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlseg3e16ff.c b/auto-generated/bfloat16/overloaded-api-testing/vlseg3e16ff.c
new file mode 100644
index 000000000..c708c12bf
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlseg3e16ff.c
@@ -0,0 +1,22 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlseg4e16.c b/auto-generated/bfloat16/overloaded-api-testing/vlseg4e16.c
new file mode 100644
index 000000000..4d0994c2a
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlseg4e16.c
@@ -0,0 +1,22 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg4e16(vm, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg4e16(vm, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg4e16(vm, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg4e16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlseg4e16ff.c b/auto-generated/bfloat16/overloaded-api-testing/vlseg4e16ff.c
new file mode 100644
index 000000000..bdfb7c1ed
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlseg4e16ff.c
@@ -0,0 +1,22 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlseg5e16.c b/auto-generated/bfloat16/overloaded-api-testing/vlseg5e16.c
new file mode 100644
index 000000000..0a8e634c5
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlseg5e16.c
@@ -0,0 +1,17 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg5e16(vm, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg5e16(vm, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg5e16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlseg5e16ff.c b/auto-generated/bfloat16/overloaded-api-testing/vlseg5e16ff.c
new file mode 100644
index 000000000..6e8e9d1f0
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlseg5e16ff.c
@@ -0,0 +1,17 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlseg6e16.c b/auto-generated/bfloat16/overloaded-api-testing/vlseg6e16.c
new file mode 100644
index 000000000..9365aeb8d
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlseg6e16.c
@@ -0,0 +1,17 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg6e16(vm, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg6e16(vm, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg6e16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlseg6e16ff.c b/auto-generated/bfloat16/overloaded-api-testing/vlseg6e16ff.c
new file mode 100644
index 000000000..8376dd159
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlseg6e16ff.c
@@ -0,0 +1,17 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlseg7e16.c b/auto-generated/bfloat16/overloaded-api-testing/vlseg7e16.c
new file mode 100644
index 000000000..01c7e48d8
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlseg7e16.c
@@ -0,0 +1,17 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg7e16(vm, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg7e16(vm, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg7e16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlseg7e16ff.c b/auto-generated/bfloat16/overloaded-api-testing/vlseg7e16ff.c
new file mode 100644
index 000000000..8db5ec0e2
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlseg7e16ff.c
@@ -0,0 +1,17 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlseg8e16.c b/auto-generated/bfloat16/overloaded-api-testing/vlseg8e16.c
new file mode 100644
index 000000000..cc5804338
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlseg8e16.c
@@ -0,0 +1,17 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg8e16(vm, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1,
+                                              size_t vl) {
+  return __riscv_vlseg8e16(vm, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                            size_t vl) {
+  return __riscv_vlseg8e16(vm, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlseg8e16ff.c b/auto-generated/bfloat16/overloaded-api-testing/vlseg8e16ff.c
new file mode 100644
index 000000000..4011e172e
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlseg8e16ff.c
@@ -0,0 +1,17 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff(vm, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                              size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff(vm, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlsseg2e16.c b/auto-generated/bfloat16/overloaded-api-testing/vlsseg2e16.c
new file mode 100644
index 000000000..53c30e9e0
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlsseg2e16.c
@@ -0,0 +1,27 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlsseg3e16.c b/auto-generated/bfloat16/overloaded-api-testing/vlsseg3e16.c
new file mode 100644
index 000000000..b3f3213fa
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlsseg3e16.c
@@ -0,0 +1,22 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlsseg4e16.c b/auto-generated/bfloat16/overloaded-api-testing/vlsseg4e16.c
new file mode 100644
index 000000000..b24623f0a
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlsseg4e16.c
@@ -0,0 +1,22 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlsseg5e16.c b/auto-generated/bfloat16/overloaded-api-testing/vlsseg5e16.c
new file mode 100644
index 000000000..98e718b17
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlsseg5e16.c
@@ -0,0 +1,17 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlsseg6e16.c b/auto-generated/bfloat16/overloaded-api-testing/vlsseg6e16.c
new file mode 100644
index 000000000..9b6a0f74a
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlsseg6e16.c
@@ -0,0 +1,17 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlsseg7e16.c b/auto-generated/bfloat16/overloaded-api-testing/vlsseg7e16.c
new file mode 100644
index 000000000..4c25ff34d
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlsseg7e16.c
@@ -0,0 +1,17 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vlsseg8e16.c b/auto-generated/bfloat16/overloaded-api-testing/vlsseg8e16.c
new file mode 100644
index 000000000..dde6175ca
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vlsseg8e16.c
@@ -0,0 +1,17 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1,
+                                               ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                             ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vluxei16.c b/auto-generated/bfloat16/overloaded-api-testing/vluxei16.c
new file mode 100644
index 000000000..934f2d147
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vluxei16.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4(const __bf16 *rs1, vuint16mf4_t rs2,
+                                       size_t vl) {
+  return __riscv_vluxei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2(const __bf16 *rs1, vuint16mf2_t rs2,
+                                       size_t vl) {
+  return __riscv_vluxei16(rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16(rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16(rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4(const __bf16 *rs1, vuint16m4_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16(rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8(const __bf16 *rs1, vuint16m8_t rs2,
+                                     size_t vl) {
+  return __riscv_vluxei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1,
+                                       vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1,
+                                       vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1,
+                                       vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1,
+                                       vuint16m8_t rs2, size_t vl) {
+  return __riscv_vluxei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vluxseg2ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vluxseg2ei16.c
new file mode 100644
index 000000000..73f98c757
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vluxseg2ei16.c
@@ -0,0 +1,54 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2(const __bf16 *rs1, vuint16m4_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg2ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1,
+                                               vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vluxseg3ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vluxseg3ei16.c
new file mode 100644
index 000000000..a63c93a80
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vluxseg3ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg3ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vluxseg4ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vluxseg4ei16.c
new file mode 100644
index 000000000..77ad1b7a3
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vluxseg4ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4(const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg4ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1,
+                                               vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vluxseg5ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vluxseg5ei16.c
new file mode 100644
index 000000000..e125c0b5a
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vluxseg5ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg5ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vluxseg6ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vluxseg6ei16.c
new file mode 100644
index 000000000..570414da9
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vluxseg6ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg6ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vluxseg7ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vluxseg7ei16.c
new file mode 100644
index 000000000..ecf6bb4ee
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vluxseg7ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg7ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vluxseg8ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vluxseg8ei16.c
new file mode 100644
index 000000000..bf428cc23
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vluxseg8ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8(const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8(const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16(rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8(const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl) {
+  return __riscv_vluxseg8ei16(rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_m(vbool64_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_m(vbool32_t vm,
+                                                 const __bf16 *rs1,
+                                                 vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16(vm, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1,
+                                               vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16(vm, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vreinterpret.c b/auto-generated/bfloat16/overloaded-api-testing/vreinterpret.c
new file mode 100644
index 000000000..457fecd65
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vreinterpret.c
@@ -0,0 +1,98 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vreinterpret_v_i16mf4_bf16mf4(vint16mf4_t src) {
+  return __riscv_vreinterpret_bf16mf4(src);
+}
+
+vbfloat16mf2_t test_vreinterpret_v_i16mf2_bf16mf2(vint16mf2_t src) {
+  return __riscv_vreinterpret_bf16mf2(src);
+}
+
+vbfloat16m1_t test_vreinterpret_v_i16m1_bf16m1(vint16m1_t src) {
+  return __riscv_vreinterpret_bf16m1(src);
+}
+
+vbfloat16m2_t test_vreinterpret_v_i16m2_bf16m2(vint16m2_t src) {
+  return __riscv_vreinterpret_bf16m2(src);
+}
+
+vbfloat16m4_t test_vreinterpret_v_i16m4_bf16m4(vint16m4_t src) {
+  return __riscv_vreinterpret_bf16m4(src);
+}
+
+vbfloat16m8_t test_vreinterpret_v_i16m8_bf16m8(vint16m8_t src) {
+  return __riscv_vreinterpret_bf16m8(src);
+}
+
+vbfloat16mf4_t test_vreinterpret_v_u16mf4_bf16mf4(vuint16mf4_t src) {
+  return __riscv_vreinterpret_bf16mf4(src);
+}
+
+vbfloat16mf2_t test_vreinterpret_v_u16mf2_bf16mf2(vuint16mf2_t src) {
+  return __riscv_vreinterpret_bf16mf2(src);
+}
+
+vbfloat16m1_t test_vreinterpret_v_u16m1_bf16m1(vuint16m1_t src) {
+  return __riscv_vreinterpret_bf16m1(src);
+}
+
+vbfloat16m2_t test_vreinterpret_v_u16m2_bf16m2(vuint16m2_t src) {
+  return __riscv_vreinterpret_bf16m2(src);
+}
+
+vbfloat16m4_t test_vreinterpret_v_u16m4_bf16m4(vuint16m4_t src) {
+  return __riscv_vreinterpret_bf16m4(src);
+}
+
+vbfloat16m8_t test_vreinterpret_v_u16m8_bf16m8(vuint16m8_t src) {
+  return __riscv_vreinterpret_bf16m8(src);
+}
+
+vint16mf4_t test_vreinterpret_v_bf16mf4_i16mf4(vbfloat16mf4_t src) {
+  return __riscv_vreinterpret_i16mf4(src);
+}
+
+vint16mf2_t test_vreinterpret_v_bf16mf2_i16mf2(vbfloat16mf2_t src) {
+  return __riscv_vreinterpret_i16mf2(src);
+}
+
+vint16m1_t test_vreinterpret_v_bf16m1_i16m1(vbfloat16m1_t src) {
+  return __riscv_vreinterpret_i16m1(src);
+}
+
+vint16m2_t test_vreinterpret_v_bf16m2_i16m2(vbfloat16m2_t src) {
+  return __riscv_vreinterpret_i16m2(src);
+}
+
+vint16m4_t test_vreinterpret_v_bf16m4_i16m4(vbfloat16m4_t src) {
+  return __riscv_vreinterpret_i16m4(src);
+}
+
+vint16m8_t test_vreinterpret_v_bf16m8_i16m8(vbfloat16m8_t src) {
+  return __riscv_vreinterpret_i16m8(src);
+}
+
+vuint16mf4_t test_vreinterpret_v_bf16mf4_u16mf4(vbfloat16mf4_t src) {
+  return __riscv_vreinterpret_u16mf4(src);
+}
+
+vuint16mf2_t test_vreinterpret_v_bf16mf2_u16mf2(vbfloat16mf2_t src) {
+  return __riscv_vreinterpret_u16mf2(src);
+}
+
+vuint16m1_t test_vreinterpret_v_bf16m1_u16m1(vbfloat16m1_t src) {
+  return __riscv_vreinterpret_u16m1(src);
+}
+
+vuint16m2_t test_vreinterpret_v_bf16m2_u16m2(vbfloat16m2_t src) {
+  return __riscv_vreinterpret_u16m2(src);
+}
+
+vuint16m4_t test_vreinterpret_v_bf16m4_u16m4(vbfloat16m4_t src) {
+  return __riscv_vreinterpret_u16m4(src);
+}
+
+vuint16m8_t test_vreinterpret_v_bf16m8_u16m8(vbfloat16m8_t src) {
+  return __riscv_vreinterpret_u16m8(src);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vse16.c b/auto-generated/bfloat16/overloaded-api-testing/vse16.c
new file mode 100644
index 000000000..923f74fd4
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vse16.c
@@ -0,0 +1,56 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vse16_v_bf16mf4(__bf16 *rs1, vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vse16(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16mf2(__bf16 *rs1, vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vse16(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m1(__bf16 *rs1, vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vse16(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m2(__bf16 *rs1, vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vse16(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m4(__bf16 *rs1, vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vse16(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m8(__bf16 *rs1, vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vse16(rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vbfloat16mf4_t vs3,
+                            size_t vl) {
+  return __riscv_vse16(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vbfloat16mf2_t vs3,
+                            size_t vl) {
+  return __riscv_vse16(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1_t vs3,
+                           size_t vl) {
+  return __riscv_vse16(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2_t vs3,
+                           size_t vl) {
+  return __riscv_vse16(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vbfloat16m4_t vs3,
+                           size_t vl) {
+  return __riscv_vse16(vm, rs1, vs3, vl);
+}
+
+void test_vse16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vbfloat16m8_t vs3,
+                           size_t vl) {
+  return __riscv_vse16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vset.c b/auto-generated/bfloat16/overloaded-api-testing/vset.c
new file mode 100644
index 000000000..93fe47cd1
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vset.c
@@ -0,0 +1,171 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16m2_t test_vset_v_bf16m1_bf16m2(vbfloat16m2_t dest, size_t index,
+                                        vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m4_t test_vset_v_bf16m1_bf16m4(vbfloat16m4_t dest, size_t index,
+                                        vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m4_t test_vset_v_bf16m2_bf16m4(vbfloat16m4_t dest, size_t index,
+                                        vbfloat16m2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m8_t test_vset_v_bf16m1_bf16m8(vbfloat16m8_t dest, size_t index,
+                                        vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m8_t test_vset_v_bf16m2_bf16m8(vbfloat16m8_t dest, size_t index,
+                                        vbfloat16m2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m8_t test_vset_v_bf16m4_bf16m8(vbfloat16m8_t dest, size_t index,
+                                        vbfloat16m4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf4x2_t test_vset_v_bf16mf4_bf16mf4x2(vbfloat16mf4x2_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf4x3_t test_vset_v_bf16mf4_bf16mf4x3(vbfloat16mf4x3_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf4x4_t test_vset_v_bf16mf4_bf16mf4x4(vbfloat16mf4x4_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf4x5_t test_vset_v_bf16mf4_bf16mf4x5(vbfloat16mf4x5_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf4x6_t test_vset_v_bf16mf4_bf16mf4x6(vbfloat16mf4x6_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf4x7_t test_vset_v_bf16mf4_bf16mf4x7(vbfloat16mf4x7_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf4x8_t test_vset_v_bf16mf4_bf16mf4x8(vbfloat16mf4x8_t dest,
+                                               size_t index,
+                                               vbfloat16mf4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf2x2_t test_vset_v_bf16mf2_bf16mf2x2(vbfloat16mf2x2_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf2x3_t test_vset_v_bf16mf2_bf16mf2x3(vbfloat16mf2x3_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf2x4_t test_vset_v_bf16mf2_bf16mf2x4(vbfloat16mf2x4_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf2x5_t test_vset_v_bf16mf2_bf16mf2x5(vbfloat16mf2x5_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf2x6_t test_vset_v_bf16mf2_bf16mf2x6(vbfloat16mf2x6_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf2x7_t test_vset_v_bf16mf2_bf16mf2x7(vbfloat16mf2x7_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16mf2x8_t test_vset_v_bf16mf2_bf16mf2x8(vbfloat16mf2x8_t dest,
+                                               size_t index,
+                                               vbfloat16mf2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m1x2_t test_vset_v_bf16m1_bf16m1x2(vbfloat16m1x2_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m1x3_t test_vset_v_bf16m1_bf16m1x3(vbfloat16m1x3_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m1x4_t test_vset_v_bf16m1_bf16m1x4(vbfloat16m1x4_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m1x5_t test_vset_v_bf16m1_bf16m1x5(vbfloat16m1x5_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m1x6_t test_vset_v_bf16m1_bf16m1x6(vbfloat16m1x6_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m1x7_t test_vset_v_bf16m1_bf16m1x7(vbfloat16m1x7_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m1x8_t test_vset_v_bf16m1_bf16m1x8(vbfloat16m1x8_t dest, size_t index,
+                                            vbfloat16m1_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m2x2_t test_vset_v_bf16m2_bf16m2x2(vbfloat16m2x2_t dest, size_t index,
+                                            vbfloat16m2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m2x3_t test_vset_v_bf16m2_bf16m2x3(vbfloat16m2x3_t dest, size_t index,
+                                            vbfloat16m2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m2x4_t test_vset_v_bf16m2_bf16m2x4(vbfloat16m2x4_t dest, size_t index,
+                                            vbfloat16m2_t value) {
+  return __riscv_vset(dest, 0, value);
+}
+
+vbfloat16m4x2_t test_vset_v_bf16m4_bf16m4x2(vbfloat16m4x2_t dest, size_t index,
+                                            vbfloat16m4_t value) {
+  return __riscv_vset(dest, 0, value);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsoxei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsoxei16.c
new file mode 100644
index 000000000..c642d0d2c
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsoxei16.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxei16_v_bf16mf4(__bf16 *rs1, vuint16mf4_t rs2, vbfloat16mf4_t vs3,
+                             size_t vl) {
+  return __riscv_vsoxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16mf2(__bf16 *rs1, vuint16mf2_t rs2, vbfloat16mf2_t vs3,
+                             size_t vl) {
+  return __riscv_vsoxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m1(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m2(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m4(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m8(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                            size_t vl) {
+  return __riscv_vsoxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                               vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vsoxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                               vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vsoxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                              vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vsoxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                              vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vsoxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                              vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vsoxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsoxei16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                              vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vsoxei16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsoxseg2ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsoxseg2ei16.c
new file mode 100644
index 000000000..04116b6bc
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsoxseg2ei16.c
@@ -0,0 +1,54 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxseg2ei16_v_bf16mf4x2(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16mf2x2(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m1x2(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m2x2(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m4x2(__bf16 *rs1, vuint16m4_t vs2,
+                                  vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg2ei16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t vs2,
+                                    vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsoxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsoxseg3ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsoxseg3ei16.c
new file mode 100644
index 000000000..5b573578f
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsoxseg3ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxseg3ei16_v_bf16mf4x3(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16mf2x3(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m1x3(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m2x3(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg3ei16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsoxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsoxseg4ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsoxseg4ei16.c
new file mode 100644
index 000000000..f20ddb725
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsoxseg4ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxseg4ei16_v_bf16mf4x4(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16mf2x4(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m1x4(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m2x4(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg4ei16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsoxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsoxseg5ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsoxseg5ei16.c
new file mode 100644
index 000000000..b3bf5bf37
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsoxseg5ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxseg5ei16_v_bf16mf4x5(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16mf2x5(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16m1x5(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg5ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg5ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg5ei16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsoxseg5ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsoxseg6ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsoxseg6ei16.c
new file mode 100644
index 000000000..271ae1083
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsoxseg6ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxseg6ei16_v_bf16mf4x6(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16mf2x6(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16m1x6(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg6ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg6ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg6ei16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsoxseg6ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsoxseg7ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsoxseg7ei16.c
new file mode 100644
index 000000000..730c15d38
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsoxseg7ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxseg7ei16_v_bf16mf4x7(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16mf2x7(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16m1x7(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg7ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg7ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg7ei16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsoxseg7ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsoxseg8ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsoxseg8ei16.c
new file mode 100644
index 000000000..51bb463d6
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsoxseg8ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsoxseg8ei16_v_bf16mf4x8(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16mf2x8(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16m1x8(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg8ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsoxseg8ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsoxseg8ei16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsoxseg8ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsse16.c b/auto-generated/bfloat16/overloaded-api-testing/vsse16.c
new file mode 100644
index 000000000..e44dc2415
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsse16.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsse16_v_bf16mf4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4_t vs3,
+                           size_t vl) {
+  return __riscv_vsse16(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16mf2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2_t vs3,
+                           size_t vl) {
+  return __riscv_vsse16(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m1(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m4_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m8(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m8_t vs3,
+                          size_t vl) {
+  return __riscv_vsse16(rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                             vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vsse16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                             vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vsse16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vsse16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vsse16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vsse16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsse16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                            vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vsse16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsseg2e16.c b/auto-generated/bfloat16/overloaded-api-testing/vsseg2e16.c
new file mode 100644
index 000000000..ce09da7a2
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsseg2e16.c
@@ -0,0 +1,47 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsseg2e16_v_bf16mf4x2(__bf16 *rs1, vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16mf2x2(__bf16 *rs1, vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m1x2(__bf16 *rs1, vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m2x2(__bf16 *rs1, vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m4x2(__bf16 *rs1, vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16(rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsseg2e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x2_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg2e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2x2_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg2e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg2e16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vbfloat16m4x2_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg2e16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsseg3e16.c b/auto-generated/bfloat16/overloaded-api-testing/vsseg3e16.c
new file mode 100644
index 000000000..066b28cf2
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsseg3e16.c
@@ -0,0 +1,38 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsseg3e16_v_bf16mf4x3(__bf16 *rs1, vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16mf2x3(__bf16 *rs1, vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m1x3(__bf16 *rs1, vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m2x3(__bf16 *rs1, vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16(rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsseg3e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x3_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg3e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg3e16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2x3_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg3e16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsseg4e16.c b/auto-generated/bfloat16/overloaded-api-testing/vsseg4e16.c
new file mode 100644
index 000000000..c0ab986d3
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsseg4e16.c
@@ -0,0 +1,38 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsseg4e16_v_bf16mf4x4(__bf16 *rs1, vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16mf2x4(__bf16 *rs1, vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m1x4(__bf16 *rs1, vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m2x4(__bf16 *rs1, vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16(rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsseg4e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x4_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg4e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg4e16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vbfloat16m2x4_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg4e16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsseg5e16.c b/auto-generated/bfloat16/overloaded-api-testing/vsseg5e16.c
new file mode 100644
index 000000000..2c04b9b7d
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsseg5e16.c
@@ -0,0 +1,29 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsseg5e16_v_bf16mf4x5(__bf16 *rs1, vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16(rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16mf2x5(__bf16 *rs1, vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16(rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16m1x5(__bf16 *rs1, vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16(rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsseg5e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg5e16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x5_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg5e16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsseg6e16.c b/auto-generated/bfloat16/overloaded-api-testing/vsseg6e16.c
new file mode 100644
index 000000000..fe537164b
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsseg6e16.c
@@ -0,0 +1,29 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsseg6e16_v_bf16mf4x6(__bf16 *rs1, vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16(rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16mf2x6(__bf16 *rs1, vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16(rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16m1x6(__bf16 *rs1, vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16(rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsseg6e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg6e16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x6_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg6e16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsseg7e16.c b/auto-generated/bfloat16/overloaded-api-testing/vsseg7e16.c
new file mode 100644
index 000000000..36f79a388
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsseg7e16.c
@@ -0,0 +1,29 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsseg7e16_v_bf16mf4x7(__bf16 *rs1, vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16(rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16mf2x7(__bf16 *rs1, vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16(rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16m1x7(__bf16 *rs1, vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16(rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsseg7e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg7e16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x7_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg7e16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsseg8e16.c b/auto-generated/bfloat16/overloaded-api-testing/vsseg8e16.c
new file mode 100644
index 000000000..c6a631a0e
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsseg8e16.c
@@ -0,0 +1,29 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsseg8e16_v_bf16mf4x8(__bf16 *rs1, vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16(rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16mf2x8(__bf16 *rs1, vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16(rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16m1x8(__bf16 *rs1, vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16(rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                  vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                  vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsseg8e16(vm, rs1, vs3, vl);
+}
+
+void test_vsseg8e16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vbfloat16m1x8_t vs3,
+                                 size_t vl) {
+  return __riscv_vsseg8e16(vm, rs1, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vssseg2e16.c b/auto-generated/bfloat16/overloaded-api-testing/vssseg2e16.c
new file mode 100644
index 000000000..28d76f0ae
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vssseg2e16.c
@@ -0,0 +1,52 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vssseg2e16_v_bf16mf4x2(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16mf2x2(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m1x2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x2_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg2e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m2x2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x2_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg2e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m4x2(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m4x2_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg2e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg2e16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vssseg2e16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vssseg3e16.c b/auto-generated/bfloat16/overloaded-api-testing/vssseg3e16.c
new file mode 100644
index 000000000..445145245
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vssseg3e16.c
@@ -0,0 +1,42 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vssseg3e16_v_bf16mf4x3(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16mf2x3(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m1x3(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x3_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg3e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m2x3(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x3_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg3e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg3e16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vssseg3e16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vssseg4e16.c b/auto-generated/bfloat16/overloaded-api-testing/vssseg4e16.c
new file mode 100644
index 000000000..98d2b433a
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vssseg4e16.c
@@ -0,0 +1,42 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vssseg4e16_v_bf16mf4x4(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16mf2x4(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m1x4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x4_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg4e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m2x4(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x4_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg4e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg4e16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vssseg4e16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vssseg5e16.c b/auto-generated/bfloat16/overloaded-api-testing/vssseg5e16.c
new file mode 100644
index 000000000..d6f27bf5e
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vssseg5e16.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vssseg5e16_v_bf16mf4x5(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16mf2x5(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16m1x5(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x5_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg5e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg5e16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vssseg5e16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vssseg6e16.c b/auto-generated/bfloat16/overloaded-api-testing/vssseg6e16.c
new file mode 100644
index 000000000..ad952f272
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vssseg6e16.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vssseg6e16_v_bf16mf4x6(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16mf2x6(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16m1x6(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x6_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg6e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg6e16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vssseg6e16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vssseg7e16.c b/auto-generated/bfloat16/overloaded-api-testing/vssseg7e16.c
new file mode 100644
index 000000000..b84d2b9db
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vssseg7e16.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vssseg7e16_v_bf16mf4x7(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16mf2x7(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16m1x7(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x7_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg7e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg7e16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vssseg7e16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vssseg8e16.c b/auto-generated/bfloat16/overloaded-api-testing/vssseg8e16.c
new file mode 100644
index 000000000..195be7c8e
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vssseg8e16.c
@@ -0,0 +1,32 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vssseg8e16_v_bf16mf4x8(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16mf2x8(__bf16 *rs1, ptrdiff_t rs2,
+                                 vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16m1x8(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x8_t vs3,
+                                size_t vl) {
+  return __riscv_vssseg8e16(rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                   vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vssseg8e16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                                  vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vssseg8e16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsuxei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsuxei16.c
new file mode 100644
index 000000000..4236a87c0
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsuxei16.c
@@ -0,0 +1,62 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxei16_v_bf16mf4(__bf16 *rs1, vuint16mf4_t rs2, vbfloat16mf4_t vs3,
+                             size_t vl) {
+  return __riscv_vsuxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16mf2(__bf16 *rs1, vuint16mf2_t rs2, vbfloat16mf2_t vs3,
+                             size_t vl) {
+  return __riscv_vsuxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m1(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m2(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m4(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m8(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                            size_t vl) {
+  return __riscv_vsuxei16(rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                               vbfloat16mf4_t vs3, size_t vl) {
+  return __riscv_vsuxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                               vbfloat16mf2_t vs3, size_t vl) {
+  return __riscv_vsuxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                              vbfloat16m1_t vs3, size_t vl) {
+  return __riscv_vsuxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                              vbfloat16m2_t vs3, size_t vl) {
+  return __riscv_vsuxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                              vbfloat16m4_t vs3, size_t vl) {
+  return __riscv_vsuxei16(vm, rs1, rs2, vs3, vl);
+}
+
+void test_vsuxei16_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                              vbfloat16m8_t vs3, size_t vl) {
+  return __riscv_vsuxei16(vm, rs1, rs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsuxseg2ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsuxseg2ei16.c
new file mode 100644
index 000000000..df05ac74e
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsuxseg2ei16.c
@@ -0,0 +1,54 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxseg2ei16_v_bf16mf4x2(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16mf2x2(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m1x2(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m2x2(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m4x2(__bf16 *rs1, vuint16m4_t vs2,
+                                  vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg2ei16_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint16m4_t vs2,
+                                    vbfloat16m4x2_t vs3, size_t vl) {
+  return __riscv_vsuxseg2ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsuxseg3ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsuxseg3ei16.c
new file mode 100644
index 000000000..6ca09eb44
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsuxseg3ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxseg3ei16_v_bf16mf4x3(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16mf2x3(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m1x3(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m2x3(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg3ei16_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x3_t vs3, size_t vl) {
+  return __riscv_vsuxseg3ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsuxseg4ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsuxseg4ei16.c
new file mode 100644
index 000000000..15d0841c3
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsuxseg4ei16.c
@@ -0,0 +1,44 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxseg4ei16_v_bf16mf4x4(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16mf2x4(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m1x4(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m2x4(__bf16 *rs1, vuint16m2_t vs2,
+                                  vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg4ei16_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                                    vbfloat16m2x4_t vs3, size_t vl) {
+  return __riscv_vsuxseg4ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsuxseg5ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsuxseg5ei16.c
new file mode 100644
index 000000000..7467e5cd1
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsuxseg5ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxseg5ei16_v_bf16mf4x5(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16mf2x5(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16m1x5(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg5ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg5ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg5ei16_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x5_t vs3, size_t vl) {
+  return __riscv_vsuxseg5ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsuxseg6ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsuxseg6ei16.c
new file mode 100644
index 000000000..437c9778f
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsuxseg6ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxseg6ei16_v_bf16mf4x6(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16mf2x6(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16m1x6(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg6ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg6ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg6ei16_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x6_t vs3, size_t vl) {
+  return __riscv_vsuxseg6ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsuxseg7ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsuxseg7ei16.c
new file mode 100644
index 000000000..7e86d2539
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsuxseg7ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxseg7ei16_v_bf16mf4x7(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16mf2x7(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16m1x7(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg7ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg7ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg7ei16_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x7_t vs3, size_t vl) {
+  return __riscv_vsuxseg7ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded-api-testing/vsuxseg8ei16.c b/auto-generated/bfloat16/overloaded-api-testing/vsuxseg8ei16.c
new file mode 100644
index 000000000..eaaae3645
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded-api-testing/vsuxseg8ei16.c
@@ -0,0 +1,34 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+void test_vsuxseg8ei16_v_bf16mf4x8(__bf16 *rs1, vuint16mf4_t vs2,
+                                   vbfloat16mf4x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16mf2x8(__bf16 *rs1, vuint16mf2_t vs2,
+                                   vbfloat16mf2x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16m1x8(__bf16 *rs1, vuint16m1_t vs2,
+                                  vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16(rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1,
+                                     vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg8ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1,
+                                     vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                                     size_t vl) {
+  return __riscv_vsuxseg8ei16(vm, rs1, vs2, vs3, vl);
+}
+
+void test_vsuxseg8ei16_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                                    vbfloat16m1x8_t vs3, size_t vl) {
+  return __riscv_vsuxseg8ei16(vm, rs1, vs2, vs3, vl);
+}
diff --git a/auto-generated/bfloat16/overloaded_intrinsic_funcs.adoc b/auto-generated/bfloat16/overloaded_intrinsic_funcs.adoc
new file mode 100644
index 000000000..b5200a485
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded_intrinsic_funcs.adoc
@@ -0,0 +1,1317 @@
+
+=== BFloat16 Vector Loads and Stores Intrinsics
+
+[[overloaded-bf16-vector-unit-stride-load]]
+==== Vector Unit-Stride Load Intrinsics
+
+[,c]
+----
+// masked functions
+vbfloat16mf4_t __riscv_vle16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16(vbool8_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16(vbool4_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16(vbool2_t vm, const __bf16 *rs1, size_t vl);
+----
+
+[[overloaded-bf16-vector-unit-stride-store]]
+==== Vector Unit-Stride Store Intrinsics
+
+[,c]
+----
+void __riscv_vse16(__bf16 *rs1, vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vse16(__bf16 *rs1, vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vse16(__bf16 *rs1, vbfloat16m1_t vs3, size_t vl);
+void __riscv_vse16(__bf16 *rs1, vbfloat16m2_t vs3, size_t vl);
+void __riscv_vse16(__bf16 *rs1, vbfloat16m4_t vs3, size_t vl);
+void __riscv_vse16(__bf16 *rs1, vbfloat16m8_t vs3, size_t vl);
+// masked functions
+void __riscv_vse16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vse16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vse16(vbool16_t vm, __bf16 *rs1, vbfloat16m1_t vs3, size_t vl);
+void __riscv_vse16(vbool8_t vm, __bf16 *rs1, vbfloat16m2_t vs3, size_t vl);
+void __riscv_vse16(vbool4_t vm, __bf16 *rs1, vbfloat16m4_t vs3, size_t vl);
+void __riscv_vse16(vbool2_t vm, __bf16 *rs1, vbfloat16m8_t vs3, size_t vl);
+----
+
+[[overloaded-vector-strided-load]]
+==== Vector Strided Load Intrinsics
+
+[,c]
+----
+// masked functions
+vbfloat16mf4_t __riscv_vlse16(vbool64_t vm, const __bf16 *rs1, ptrdiff_t rs2,
+                              size_t vl);
+vbfloat16mf2_t __riscv_vlse16(vbool32_t vm, const __bf16 *rs1, ptrdiff_t rs2,
+                              size_t vl);
+vbfloat16m1_t __riscv_vlse16(vbool16_t vm, const __bf16 *rs1, ptrdiff_t rs2,
+                             size_t vl);
+vbfloat16m2_t __riscv_vlse16(vbool8_t vm, const __bf16 *rs1, ptrdiff_t rs2,
+                             size_t vl);
+vbfloat16m4_t __riscv_vlse16(vbool4_t vm, const __bf16 *rs1, ptrdiff_t rs2,
+                             size_t vl);
+vbfloat16m8_t __riscv_vlse16(vbool2_t vm, const __bf16 *rs1, ptrdiff_t rs2,
+                             size_t vl);
+----
+
+[[overloaded-vector-strided-store]]
+==== Vector Strided Store Intrinsics
+
+[,c]
+----
+void __riscv_vsse16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsse16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsse16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1_t vs3, size_t vl);
+void __riscv_vsse16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2_t vs3, size_t vl);
+void __riscv_vsse16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m4_t vs3, size_t vl);
+void __riscv_vsse16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m8_t vs3, size_t vl);
+// masked functions
+void __riscv_vsse16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                    vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsse16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                    vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsse16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2, vbfloat16m1_t vs3,
+                    size_t vl);
+void __riscv_vsse16(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2, vbfloat16m2_t vs3,
+                    size_t vl);
+void __riscv_vsse16(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2, vbfloat16m4_t vs3,
+                    size_t vl);
+void __riscv_vsse16(vbool2_t vm, __bf16 *rs1, ptrdiff_t rs2, vbfloat16m8_t vs3,
+                    size_t vl);
+----
+
+[[overloaded-vector-indexed-load]]
+==== Vector Indexed Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vloxei16(const __bf16 *rs1, vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16(const __bf16 *rs1, vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16(const __bf16 *rs1, vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vloxei16(const __bf16 *rs1, vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vloxei16(const __bf16 *rs1, vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vloxei16(const __bf16 *rs1, vuint16m8_t rs2, size_t vl);
+vbfloat16mf4_t __riscv_vluxei16(const __bf16 *rs1, vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16(const __bf16 *rs1, vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16(const __bf16 *rs1, vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vluxei16(const __bf16 *rs1, vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vluxei16(const __bf16 *rs1, vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vluxei16(const __bf16 *rs1, vuint16m8_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16(vbool64_t vm, const __bf16 *rs1,
+                                vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16(vbool32_t vm, const __bf16 *rs1,
+                                vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16(vbool16_t vm, const __bf16 *rs1, vuint16m1_t rs2,
+                               size_t vl);
+vbfloat16m2_t __riscv_vloxei16(vbool8_t vm, const __bf16 *rs1, vuint16m2_t rs2,
+                               size_t vl);
+vbfloat16m4_t __riscv_vloxei16(vbool4_t vm, const __bf16 *rs1, vuint16m4_t rs2,
+                               size_t vl);
+vbfloat16m8_t __riscv_vloxei16(vbool2_t vm, const __bf16 *rs1, vuint16m8_t rs2,
+                               size_t vl);
+vbfloat16mf4_t __riscv_vluxei16(vbool64_t vm, const __bf16 *rs1,
+                                vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16(vbool32_t vm, const __bf16 *rs1,
+                                vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16(vbool16_t vm, const __bf16 *rs1, vuint16m1_t rs2,
+                               size_t vl);
+vbfloat16m2_t __riscv_vluxei16(vbool8_t vm, const __bf16 *rs1, vuint16m2_t rs2,
+                               size_t vl);
+vbfloat16m4_t __riscv_vluxei16(vbool4_t vm, const __bf16 *rs1, vuint16m4_t rs2,
+                               size_t vl);
+vbfloat16m8_t __riscv_vluxei16(vbool2_t vm, const __bf16 *rs1, vuint16m8_t rs2,
+                               size_t vl);
+----
+
+[[overloaded-vector-indexed-store]]
+==== Vector Indexed Store Intrinsics
+
+[,c]
+----
+void __riscv_vsoxei16(__bf16 *rs1, vuint16mf4_t rs2, vbfloat16mf4_t vs3,
+                      size_t vl);
+void __riscv_vsoxei16(__bf16 *rs1, vuint16mf2_t rs2, vbfloat16mf2_t vs3,
+                      size_t vl);
+void __riscv_vsoxei16(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                      size_t vl);
+void __riscv_vsoxei16(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                      size_t vl);
+void __riscv_vsoxei16(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                      size_t vl);
+void __riscv_vsoxei16(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                      size_t vl);
+void __riscv_vsuxei16(__bf16 *rs1, vuint16mf4_t rs2, vbfloat16mf4_t vs3,
+                      size_t vl);
+void __riscv_vsuxei16(__bf16 *rs1, vuint16mf2_t rs2, vbfloat16mf2_t vs3,
+                      size_t vl);
+void __riscv_vsuxei16(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                      size_t vl);
+void __riscv_vsuxei16(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                      size_t vl);
+void __riscv_vsuxei16(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                      size_t vl);
+void __riscv_vsuxei16(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                      size_t vl);
+// masked functions
+void __riscv_vsoxei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                      vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsoxei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                      vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsoxei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                      vbfloat16m1_t vs3, size_t vl);
+void __riscv_vsoxei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                      vbfloat16m2_t vs3, size_t vl);
+void __riscv_vsoxei16(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                      vbfloat16m4_t vs3, size_t vl);
+void __riscv_vsoxei16(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                      vbfloat16m8_t vs3, size_t vl);
+void __riscv_vsuxei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                      vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsuxei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                      vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsuxei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                      vbfloat16m1_t vs3, size_t vl);
+void __riscv_vsuxei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                      vbfloat16m2_t vs3, size_t vl);
+void __riscv_vsuxei16(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                      vbfloat16m4_t vs3, size_t vl);
+void __riscv_vsuxei16(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                      vbfloat16m8_t vs3, size_t vl);
+----
+
+[[overloaded-unit-stride-fault-only-first-loads]]
+==== Unit-stride Fault-Only-First Loads Intrinsics
+
+[,c]
+----
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff(vbool64_t vm, const __bf16 *rs1, size_t *new_vl,
+                               size_t vl);
+vbfloat16mf2_t __riscv_vle16ff(vbool32_t vm, const __bf16 *rs1, size_t *new_vl,
+                               size_t vl);
+vbfloat16m1_t __riscv_vle16ff(vbool16_t vm, const __bf16 *rs1, size_t *new_vl,
+                              size_t vl);
+vbfloat16m2_t __riscv_vle16ff(vbool8_t vm, const __bf16 *rs1, size_t *new_vl,
+                              size_t vl);
+vbfloat16m4_t __riscv_vle16ff(vbool4_t vm, const __bf16 *rs1, size_t *new_vl,
+                              size_t vl);
+vbfloat16m8_t __riscv_vle16ff(vbool2_t vm, const __bf16 *rs1, size_t *new_vl,
+                              size_t vl);
+----
+
+=== BFloat16 Vector Loads and Stores Segment Intrinsics
+
+[[overloaded-vector-unit-stride-segment-load]]
+==== Vector Unit-Stride Segment Load Intrinsics
+
+[,c]
+----
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16(vbool8_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16(vbool8_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16(vbool8_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16(vbool4_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff(vbool64_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff(vbool64_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff(vbool64_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff(vbool64_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff(vbool64_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff(vbool64_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff(vbool64_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff(vbool32_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff(vbool32_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff(vbool32_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff(vbool32_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff(vbool32_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff(vbool32_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff(vbool32_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff(vbool16_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff(vbool16_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff(vbool16_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff(vbool16_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff(vbool16_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff(vbool16_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff(vbool16_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff(vbool8_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff(vbool8_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff(vbool8_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff(vbool4_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+----
+
+[[overloaded-vecrtor-unit-stride-segment-store]]
+==== Vector Unit-Stride Segment Store Intrinsics
+
+[,c]
+----
+void __riscv_vsseg2e16(__bf16 *rs1, vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vsseg3e16(__bf16 *rs1, vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vsseg4e16(__bf16 *rs1, vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vsseg5e16(__bf16 *rs1, vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vsseg6e16(__bf16 *rs1, vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vsseg7e16(__bf16 *rs1, vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vsseg8e16(__bf16 *rs1, vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vsseg2e16(__bf16 *rs1, vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vsseg3e16(__bf16 *rs1, vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vsseg4e16(__bf16 *rs1, vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vsseg5e16(__bf16 *rs1, vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vsseg6e16(__bf16 *rs1, vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vsseg7e16(__bf16 *rs1, vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vsseg8e16(__bf16 *rs1, vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vsseg2e16(__bf16 *rs1, vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vsseg3e16(__bf16 *rs1, vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vsseg4e16(__bf16 *rs1, vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vsseg5e16(__bf16 *rs1, vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vsseg6e16(__bf16 *rs1, vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vsseg7e16(__bf16 *rs1, vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vsseg8e16(__bf16 *rs1, vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vsseg2e16(__bf16 *rs1, vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vsseg3e16(__bf16 *rs1, vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vsseg4e16(__bf16 *rs1, vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vsseg2e16(__bf16 *rs1, vbfloat16m4x2_t vs3, size_t vl);
+// masked functions
+void __riscv_vsseg2e16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4x2_t vs3,
+                       size_t vl);
+void __riscv_vsseg3e16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4x3_t vs3,
+                       size_t vl);
+void __riscv_vsseg4e16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4x4_t vs3,
+                       size_t vl);
+void __riscv_vsseg5e16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4x5_t vs3,
+                       size_t vl);
+void __riscv_vsseg6e16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4x6_t vs3,
+                       size_t vl);
+void __riscv_vsseg7e16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4x7_t vs3,
+                       size_t vl);
+void __riscv_vsseg8e16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4x8_t vs3,
+                       size_t vl);
+void __riscv_vsseg2e16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2x2_t vs3,
+                       size_t vl);
+void __riscv_vsseg3e16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2x3_t vs3,
+                       size_t vl);
+void __riscv_vsseg4e16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2x4_t vs3,
+                       size_t vl);
+void __riscv_vsseg5e16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2x5_t vs3,
+                       size_t vl);
+void __riscv_vsseg6e16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2x6_t vs3,
+                       size_t vl);
+void __riscv_vsseg7e16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2x7_t vs3,
+                       size_t vl);
+void __riscv_vsseg8e16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2x8_t vs3,
+                       size_t vl);
+void __riscv_vsseg2e16(vbool16_t vm, __bf16 *rs1, vbfloat16m1x2_t vs3,
+                       size_t vl);
+void __riscv_vsseg3e16(vbool16_t vm, __bf16 *rs1, vbfloat16m1x3_t vs3,
+                       size_t vl);
+void __riscv_vsseg4e16(vbool16_t vm, __bf16 *rs1, vbfloat16m1x4_t vs3,
+                       size_t vl);
+void __riscv_vsseg5e16(vbool16_t vm, __bf16 *rs1, vbfloat16m1x5_t vs3,
+                       size_t vl);
+void __riscv_vsseg6e16(vbool16_t vm, __bf16 *rs1, vbfloat16m1x6_t vs3,
+                       size_t vl);
+void __riscv_vsseg7e16(vbool16_t vm, __bf16 *rs1, vbfloat16m1x7_t vs3,
+                       size_t vl);
+void __riscv_vsseg8e16(vbool16_t vm, __bf16 *rs1, vbfloat16m1x8_t vs3,
+                       size_t vl);
+void __riscv_vsseg2e16(vbool8_t vm, __bf16 *rs1, vbfloat16m2x2_t vs3,
+                       size_t vl);
+void __riscv_vsseg3e16(vbool8_t vm, __bf16 *rs1, vbfloat16m2x3_t vs3,
+                       size_t vl);
+void __riscv_vsseg4e16(vbool8_t vm, __bf16 *rs1, vbfloat16m2x4_t vs3,
+                       size_t vl);
+void __riscv_vsseg2e16(vbool4_t vm, __bf16 *rs1, vbfloat16m4x2_t vs3,
+                       size_t vl);
+----
+
+[[overloaded-vector-strided-segment-load]]
+==== Vector Strided Segment Load Intrinsics
+
+[,c]
+----
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16(vbool64_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16(vbool64_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16(vbool64_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16(vbool64_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16(vbool64_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16(vbool64_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16(vbool64_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16(vbool32_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16(vbool32_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16(vbool32_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16(vbool32_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16(vbool32_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16(vbool32_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16(vbool32_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16(vbool16_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16(vbool16_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16(vbool16_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16(vbool16_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16(vbool16_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16(vbool16_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16(vbool16_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16(vbool8_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16(vbool8_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16(vbool8_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16(vbool4_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+----
+
+[[overloaded-vector-strided-segment-store]]
+==== Vector Strided Segment Store Intrinsics
+
+[,c]
+----
+void __riscv_vssseg2e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4x2_t vs3,
+                        size_t vl);
+void __riscv_vssseg3e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4x3_t vs3,
+                        size_t vl);
+void __riscv_vssseg4e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4x4_t vs3,
+                        size_t vl);
+void __riscv_vssseg5e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4x5_t vs3,
+                        size_t vl);
+void __riscv_vssseg6e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4x6_t vs3,
+                        size_t vl);
+void __riscv_vssseg7e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4x7_t vs3,
+                        size_t vl);
+void __riscv_vssseg8e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4x8_t vs3,
+                        size_t vl);
+void __riscv_vssseg2e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2x2_t vs3,
+                        size_t vl);
+void __riscv_vssseg3e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2x3_t vs3,
+                        size_t vl);
+void __riscv_vssseg4e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2x4_t vs3,
+                        size_t vl);
+void __riscv_vssseg5e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2x5_t vs3,
+                        size_t vl);
+void __riscv_vssseg6e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2x6_t vs3,
+                        size_t vl);
+void __riscv_vssseg7e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2x7_t vs3,
+                        size_t vl);
+void __riscv_vssseg8e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2x8_t vs3,
+                        size_t vl);
+void __riscv_vssseg2e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x2_t vs3,
+                        size_t vl);
+void __riscv_vssseg3e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x3_t vs3,
+                        size_t vl);
+void __riscv_vssseg4e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x4_t vs3,
+                        size_t vl);
+void __riscv_vssseg5e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x5_t vs3,
+                        size_t vl);
+void __riscv_vssseg6e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x6_t vs3,
+                        size_t vl);
+void __riscv_vssseg7e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x7_t vs3,
+                        size_t vl);
+void __riscv_vssseg8e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x8_t vs3,
+                        size_t vl);
+void __riscv_vssseg2e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x2_t vs3,
+                        size_t vl);
+void __riscv_vssseg3e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x3_t vs3,
+                        size_t vl);
+void __riscv_vssseg4e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x4_t vs3,
+                        size_t vl);
+void __riscv_vssseg2e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m4x2_t vs3,
+                        size_t vl);
+// masked functions
+void __riscv_vssseg2e16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vssseg3e16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vssseg4e16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vssseg5e16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vssseg6e16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vssseg7e16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vssseg8e16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vssseg2e16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vssseg3e16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vssseg4e16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vssseg5e16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vssseg6e16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vssseg7e16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vssseg8e16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vssseg2e16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vssseg3e16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vssseg4e16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vssseg5e16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vssseg6e16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vssseg7e16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vssseg8e16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vssseg2e16(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vssseg3e16(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vssseg4e16(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vssseg2e16(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m4x2_t vs3, size_t vl);
+----
+
+[[overloaded-vector-indexed-segment-load]]
+==== Vector Indexed Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vloxseg2ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16(const __bf16 *rs1, vuint16m4_t rs2,
+                                     size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16(const __bf16 *rs1, vuint16m4_t rs2,
+                                     size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16(vbool8_t vm, const __bf16 *rs1,
+                                     vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16(vbool8_t vm, const __bf16 *rs1,
+                                     vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16(vbool8_t vm, const __bf16 *rs1,
+                                     vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16(vbool4_t vm, const __bf16 *rs1,
+                                     vuint16m4_t rs2, size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16(vbool8_t vm, const __bf16 *rs1,
+                                     vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16(vbool8_t vm, const __bf16 *rs1,
+                                     vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16(vbool8_t vm, const __bf16 *rs1,
+                                     vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16(vbool4_t vm, const __bf16 *rs1,
+                                     vuint16m4_t rs2, size_t vl);
+----
+
+[[overloaded-vector-indexed-segment-store]]
+==== Vector Indexed Segment Store Intrinsics
+
+[,c]
+----
+void __riscv_vsoxseg2ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg3ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg4ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg5ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg6ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg7ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg8ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg2ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg3ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg4ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg5ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg6ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg7ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg8ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg2ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x2_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg3ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x3_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg4ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x4_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg5ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x5_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg6ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x6_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg7ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x7_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg8ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x8_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg2ei16(__bf16 *rs1, vuint16m2_t vs2, vbfloat16m2x2_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg3ei16(__bf16 *rs1, vuint16m2_t vs2, vbfloat16m2x3_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg4ei16(__bf16 *rs1, vuint16m2_t vs2, vbfloat16m2x4_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg2ei16(__bf16 *rs1, vuint16m4_t vs2, vbfloat16m4x2_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg2ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg3ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg4ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg5ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg6ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg7ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg8ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg2ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg3ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg4ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg5ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg6ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg7ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg8ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg2ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x2_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg3ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x3_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg4ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x4_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg5ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x5_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg6ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x6_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg7ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x7_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg8ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x8_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg2ei16(__bf16 *rs1, vuint16m2_t vs2, vbfloat16m2x2_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg3ei16(__bf16 *rs1, vuint16m2_t vs2, vbfloat16m2x3_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg4ei16(__bf16 *rs1, vuint16m2_t vs2, vbfloat16m2x4_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg2ei16(__bf16 *rs1, vuint16m4_t vs2, vbfloat16m4x2_t vs3,
+                          size_t vl);
+// masked functions
+void __riscv_vsoxseg2ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vsoxseg5ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vsoxseg6ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vsoxseg7ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vsoxseg8ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vsoxseg5ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vsoxseg6ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vsoxseg7ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vsoxseg8ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vsoxseg5ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vsoxseg6ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vsoxseg7ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vsoxseg8ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                          vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                          vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                          vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16(vbool4_t vm, __bf16 *rs1, vuint16m4_t vs2,
+                          vbfloat16m4x2_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vsuxseg5ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vsuxseg6ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vsuxseg7ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vsuxseg8ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vsuxseg5ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vsuxseg6ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vsuxseg7ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vsuxseg8ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vsuxseg5ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vsuxseg6ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vsuxseg7ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vsuxseg8ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                          vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                          vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                          vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16(vbool4_t vm, __bf16 *rs1, vuint16m4_t vs2,
+                          vbfloat16m4x2_t vs3, size_t vl);
+----
+
+=== BFloat16 Convert Intrinsics
+
+[[overloaded-bf16-vector-narrow-convert]]
+==== Vector Narrowing Convert Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vfncvtbf16_f(vfloat32mf2_t vs2, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f(vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f(vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f(vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f(vfloat32m8_t vs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f(vbool64_t vm, vfloat32mf2_t vs2, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f(vbool32_t vm, vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f(vbool16_t vm, vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f(vbool8_t vm, vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f(vbool4_t vm, vfloat32m8_t vs2, size_t vl);
+vbfloat16mf4_t __riscv_vfncvtbf16_f(vfloat32mf2_t vs2, unsigned int frm,
+                                    size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f(vfloat32m1_t vs2, unsigned int frm,
+                                    size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f(vfloat32m2_t vs2, unsigned int frm,
+                                   size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f(vfloat32m4_t vs2, unsigned int frm,
+                                   size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f(vfloat32m8_t vs2, unsigned int frm,
+                                   size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f(vbool64_t vm, vfloat32mf2_t vs2,
+                                    unsigned int frm, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f(vbool32_t vm, vfloat32m1_t vs2,
+                                    unsigned int frm, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f(vbool16_t vm, vfloat32m2_t vs2,
+                                   unsigned int frm, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f(vbool8_t vm, vfloat32m4_t vs2,
+                                   unsigned int frm, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f(vbool4_t vm, vfloat32m8_t vs2,
+                                   unsigned int frm, size_t vl);
+----
+
+[[overloaded-bf16-vector-widening-convert]]
+==== Vector Widening Convert Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwcvtbf16_f(vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f(vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f(vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f(vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f(vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f(vbool64_t vm, vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f(vbool32_t vm, vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f(vbool16_t vm, vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f(vbool8_t vm, vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f(vbool4_t vm, vbfloat16m4_t vs2, size_t vl);
+----
+
+=== BFloat16 Arithmetic Intrinsics
+
+[[overloaded-bf16-widening-multiply-accumulate]]
+==== Vector Widening Multiply-Accumulate Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwmaccbf16(vfloat32mf2_t vd, vbfloat16mf4_t vs1,
+                                  vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16(vfloat32mf2_t vd, __bf16 vs1,
+                                  vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                 vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vfloat32m1_t vd, __bf16 vs1,
+                                 vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                 vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vfloat32m2_t vd, __bf16 vs1, vbfloat16m1_t vs2,
+                                 size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                 vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vfloat32m4_t vd, __bf16 vs1, vbfloat16m2_t vs2,
+                                 size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                 vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vfloat32m8_t vd, __bf16 vs1, vbfloat16m4_t vs2,
+                                 size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16(vbool64_t vm, vfloat32mf2_t vd,
+                                  vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                  size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16(vbool64_t vm, vfloat32mf2_t vd, __bf16 vs1,
+                                  vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vbool32_t vm, vfloat32m1_t vd,
+                                 vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                 size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                 vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vbool16_t vm, vfloat32m2_t vd,
+                                 vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                 size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                 vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vbool8_t vm, vfloat32m4_t vd,
+                                 vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                 size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                 vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vbool4_t vm, vfloat32m8_t vd,
+                                 vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                 size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                 vbfloat16m4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16(vfloat32mf2_t vd, vbfloat16mf4_t vs1,
+                                  vbfloat16mf4_t vs2, unsigned int frm,
+                                  size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16(vfloat32mf2_t vd, __bf16 vs1,
+                                  vbfloat16mf4_t vs2, unsigned int frm,
+                                  size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                 vbfloat16mf2_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vfloat32m1_t vd, __bf16 vs1,
+                                 vbfloat16mf2_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                 vbfloat16m1_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vfloat32m2_t vd, __bf16 vs1, vbfloat16m1_t vs2,
+                                 unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                 vbfloat16m2_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vfloat32m4_t vd, __bf16 vs1, vbfloat16m2_t vs2,
+                                 unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                 vbfloat16m4_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vfloat32m8_t vd, __bf16 vs1, vbfloat16m4_t vs2,
+                                 unsigned int frm, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16(vbool64_t vm, vfloat32mf2_t vd,
+                                  vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                  unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16(vbool64_t vm, vfloat32mf2_t vd, __bf16 vs1,
+                                  vbfloat16mf4_t vs2, unsigned int frm,
+                                  size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vbool32_t vm, vfloat32m1_t vd,
+                                 vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                 unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                 vbfloat16mf2_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vbool16_t vm, vfloat32m2_t vd,
+                                 vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                 unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                 vbfloat16m1_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vbool8_t vm, vfloat32m4_t vd,
+                                 vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                 unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                 vbfloat16m2_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vbool4_t vm, vfloat32m8_t vd,
+                                 vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                 unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                 vbfloat16m4_t vs2, unsigned int frm,
+                                 size_t vl);
+----
+
+=== BFloat16 Miscellaneous Vector Utility Intrinsics
+
+[[overloaded-reinterpret-cast-conversion]]
+==== Reinterpret Cast Conversion Intrinsics
+
+[,c]
+----
+// Reinterpret between different type under the same SEW/LMUL
+vbfloat16mf4_t __riscv_vreinterpret_bf16mf4(vint16mf4_t src);
+vbfloat16mf2_t __riscv_vreinterpret_bf16mf2(vint16mf2_t src);
+vbfloat16m1_t __riscv_vreinterpret_bf16m1(vint16m1_t src);
+vbfloat16m2_t __riscv_vreinterpret_bf16m2(vint16m2_t src);
+vbfloat16m4_t __riscv_vreinterpret_bf16m4(vint16m4_t src);
+vbfloat16m8_t __riscv_vreinterpret_bf16m8(vint16m8_t src);
+vbfloat16mf4_t __riscv_vreinterpret_bf16mf4(vuint16mf4_t src);
+vbfloat16mf2_t __riscv_vreinterpret_bf16mf2(vuint16mf2_t src);
+vbfloat16m1_t __riscv_vreinterpret_bf16m1(vuint16m1_t src);
+vbfloat16m2_t __riscv_vreinterpret_bf16m2(vuint16m2_t src);
+vbfloat16m4_t __riscv_vreinterpret_bf16m4(vuint16m4_t src);
+vbfloat16m8_t __riscv_vreinterpret_bf16m8(vuint16m8_t src);
+vint16mf4_t __riscv_vreinterpret_i16mf4(vbfloat16mf4_t src);
+vint16mf2_t __riscv_vreinterpret_i16mf2(vbfloat16mf2_t src);
+vint16m1_t __riscv_vreinterpret_i16m1(vbfloat16m1_t src);
+vint16m2_t __riscv_vreinterpret_i16m2(vbfloat16m2_t src);
+vint16m4_t __riscv_vreinterpret_i16m4(vbfloat16m4_t src);
+vint16m8_t __riscv_vreinterpret_i16m8(vbfloat16m8_t src);
+vuint16mf4_t __riscv_vreinterpret_u16mf4(vbfloat16mf4_t src);
+vuint16mf2_t __riscv_vreinterpret_u16mf2(vbfloat16mf2_t src);
+vuint16m1_t __riscv_vreinterpret_u16m1(vbfloat16m1_t src);
+vuint16m2_t __riscv_vreinterpret_u16m2(vbfloat16m2_t src);
+vuint16m4_t __riscv_vreinterpret_u16m4(vbfloat16m4_t src);
+vuint16m8_t __riscv_vreinterpret_u16m8(vbfloat16m8_t src);
+----
+
+[[overloaded-vector-lmul-extensionn]]
+==== Vector LMUL Extension Intrinsics
+
+[,c]
+----
+vbfloat16mf2_t __riscv_vlmul_ext_bf16mf2(vbfloat16mf4_t value);
+vbfloat16m1_t __riscv_vlmul_ext_bf16m1(vbfloat16mf4_t value);
+vbfloat16m2_t __riscv_vlmul_ext_bf16m2(vbfloat16mf4_t value);
+vbfloat16m4_t __riscv_vlmul_ext_bf16m4(vbfloat16mf4_t value);
+vbfloat16m8_t __riscv_vlmul_ext_bf16m8(vbfloat16mf4_t value);
+vbfloat16m1_t __riscv_vlmul_ext_bf16m1(vbfloat16mf2_t value);
+vbfloat16m2_t __riscv_vlmul_ext_bf16m2(vbfloat16mf2_t value);
+vbfloat16m4_t __riscv_vlmul_ext_bf16m4(vbfloat16mf2_t value);
+vbfloat16m8_t __riscv_vlmul_ext_bf16m8(vbfloat16mf2_t value);
+vbfloat16m2_t __riscv_vlmul_ext_bf16m2(vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vlmul_ext_bf16m4(vbfloat16m1_t value);
+vbfloat16m8_t __riscv_vlmul_ext_bf16m8(vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vlmul_ext_bf16m4(vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vlmul_ext_bf16m8(vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vlmul_ext_bf16m8(vbfloat16m4_t value);
+----
+
+[[overloaded-vector-lmul-truncation]]
+==== Vector LMUL Truncation Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vlmul_trunc_bf16mf4(vbfloat16mf2_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_bf16mf4(vbfloat16m1_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_bf16mf2(vbfloat16m1_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_bf16mf4(vbfloat16m2_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_bf16mf2(vbfloat16m2_t value);
+vbfloat16m1_t __riscv_vlmul_trunc_bf16m1(vbfloat16m2_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_bf16mf4(vbfloat16m4_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_bf16mf2(vbfloat16m4_t value);
+vbfloat16m1_t __riscv_vlmul_trunc_bf16m1(vbfloat16m4_t value);
+vbfloat16m2_t __riscv_vlmul_trunc_bf16m2(vbfloat16m4_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_bf16mf4(vbfloat16m8_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_bf16mf2(vbfloat16m8_t value);
+vbfloat16m1_t __riscv_vlmul_trunc_bf16m1(vbfloat16m8_t value);
+vbfloat16m2_t __riscv_vlmul_trunc_bf16m2(vbfloat16m8_t value);
+vbfloat16m4_t __riscv_vlmul_trunc_bf16m4(vbfloat16m8_t value);
+----
+
+[[overloaded-vector-initialization]]
+==== Vector Initialization Intrinsics
+Intrinsics here don't have an overloaded variant.
+
+[[overloaded-vector-insertion]]
+==== Vector Insertion Intrinsics
+
+[,c]
+----
+vbfloat16m2_t __riscv_vset(vbfloat16m2_t dest, size_t index,
+                           vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vset(vbfloat16m4_t dest, size_t index,
+                           vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vset(vbfloat16m4_t dest, size_t index,
+                           vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vset(vbfloat16m8_t dest, size_t index,
+                           vbfloat16m1_t value);
+vbfloat16m8_t __riscv_vset(vbfloat16m8_t dest, size_t index,
+                           vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vset(vbfloat16m8_t dest, size_t index,
+                           vbfloat16m4_t value);
+vbfloat16mf4x2_t __riscv_vset(vbfloat16mf4x2_t dest, size_t index,
+                              vbfloat16mf4_t value);
+vbfloat16mf4x3_t __riscv_vset(vbfloat16mf4x3_t dest, size_t index,
+                              vbfloat16mf4_t value);
+vbfloat16mf4x4_t __riscv_vset(vbfloat16mf4x4_t dest, size_t index,
+                              vbfloat16mf4_t value);
+vbfloat16mf4x5_t __riscv_vset(vbfloat16mf4x5_t dest, size_t index,
+                              vbfloat16mf4_t value);
+vbfloat16mf4x6_t __riscv_vset(vbfloat16mf4x6_t dest, size_t index,
+                              vbfloat16mf4_t value);
+vbfloat16mf4x7_t __riscv_vset(vbfloat16mf4x7_t dest, size_t index,
+                              vbfloat16mf4_t value);
+vbfloat16mf4x8_t __riscv_vset(vbfloat16mf4x8_t dest, size_t index,
+                              vbfloat16mf4_t value);
+vbfloat16mf2x2_t __riscv_vset(vbfloat16mf2x2_t dest, size_t index,
+                              vbfloat16mf2_t value);
+vbfloat16mf2x3_t __riscv_vset(vbfloat16mf2x3_t dest, size_t index,
+                              vbfloat16mf2_t value);
+vbfloat16mf2x4_t __riscv_vset(vbfloat16mf2x4_t dest, size_t index,
+                              vbfloat16mf2_t value);
+vbfloat16mf2x5_t __riscv_vset(vbfloat16mf2x5_t dest, size_t index,
+                              vbfloat16mf2_t value);
+vbfloat16mf2x6_t __riscv_vset(vbfloat16mf2x6_t dest, size_t index,
+                              vbfloat16mf2_t value);
+vbfloat16mf2x7_t __riscv_vset(vbfloat16mf2x7_t dest, size_t index,
+                              vbfloat16mf2_t value);
+vbfloat16mf2x8_t __riscv_vset(vbfloat16mf2x8_t dest, size_t index,
+                              vbfloat16mf2_t value);
+vbfloat16m1x2_t __riscv_vset(vbfloat16m1x2_t dest, size_t index,
+                             vbfloat16m1_t value);
+vbfloat16m1x3_t __riscv_vset(vbfloat16m1x3_t dest, size_t index,
+                             vbfloat16m1_t value);
+vbfloat16m1x4_t __riscv_vset(vbfloat16m1x4_t dest, size_t index,
+                             vbfloat16m1_t value);
+vbfloat16m1x5_t __riscv_vset(vbfloat16m1x5_t dest, size_t index,
+                             vbfloat16m1_t value);
+vbfloat16m1x6_t __riscv_vset(vbfloat16m1x6_t dest, size_t index,
+                             vbfloat16m1_t value);
+vbfloat16m1x7_t __riscv_vset(vbfloat16m1x7_t dest, size_t index,
+                             vbfloat16m1_t value);
+vbfloat16m1x8_t __riscv_vset(vbfloat16m1x8_t dest, size_t index,
+                             vbfloat16m1_t value);
+vbfloat16m2x2_t __riscv_vset(vbfloat16m2x2_t dest, size_t index,
+                             vbfloat16m2_t value);
+vbfloat16m2x3_t __riscv_vset(vbfloat16m2x3_t dest, size_t index,
+                             vbfloat16m2_t value);
+vbfloat16m2x4_t __riscv_vset(vbfloat16m2x4_t dest, size_t index,
+                             vbfloat16m2_t value);
+vbfloat16m4x2_t __riscv_vset(vbfloat16m4x2_t dest, size_t index,
+                             vbfloat16m4_t value);
+----
+
+[[overloaded-vector-extraction]]
+==== Vector Extraction Intrinsics
+
+[,c]
+----
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m2_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m4_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m8_t src, size_t index);
+vbfloat16m2_t __riscv_vget_bf16m2(vbfloat16m4_t src, size_t index);
+vbfloat16m2_t __riscv_vget_bf16m2(vbfloat16m8_t src, size_t index);
+vbfloat16m4_t __riscv_vget_bf16m4(vbfloat16m8_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_bf16mf4(vbfloat16mf4x2_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_bf16mf4(vbfloat16mf4x3_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_bf16mf4(vbfloat16mf4x4_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_bf16mf4(vbfloat16mf4x5_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_bf16mf4(vbfloat16mf4x6_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_bf16mf4(vbfloat16mf4x7_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_bf16mf4(vbfloat16mf4x8_t src, size_t index);
+vbfloat16mf2_t __riscv_vget_bf16mf2(vbfloat16mf2x2_t src, size_t index);
+vbfloat16mf2_t __riscv_vget_bf16mf2(vbfloat16mf2x3_t src, size_t index);
+vbfloat16mf2_t __riscv_vget_bf16mf2(vbfloat16mf2x4_t src, size_t index);
+vbfloat16mf2_t __riscv_vget_bf16mf2(vbfloat16mf2x5_t src, size_t index);
+vbfloat16mf2_t __riscv_vget_bf16mf2(vbfloat16mf2x6_t src, size_t index);
+vbfloat16mf2_t __riscv_vget_bf16mf2(vbfloat16mf2x7_t src, size_t index);
+vbfloat16mf2_t __riscv_vget_bf16mf2(vbfloat16mf2x8_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m1x2_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m1x3_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m1x4_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m1x5_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m1x6_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m1x7_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m1x8_t src, size_t index);
+vbfloat16m2_t __riscv_vget_bf16m2(vbfloat16m2x2_t src, size_t index);
+vbfloat16m2_t __riscv_vget_bf16m2(vbfloat16m2x3_t src, size_t index);
+vbfloat16m2_t __riscv_vget_bf16m2(vbfloat16m2x4_t src, size_t index);
+vbfloat16m4_t __riscv_vget_bf16m4(vbfloat16m4x2_t src, size_t index);
+----
+
+[[overloaded-vector-creation]]
+==== Vector Creation Intrinsics
+Intrinsics here don't have an overloaded variant.
diff --git a/auto-generated/bfloat16/overloaded_intrinsic_funcs/00_bfloat16_vector_loads_and_stores_intrinsics.adoc b/auto-generated/bfloat16/overloaded_intrinsic_funcs/00_bfloat16_vector_loads_and_stores_intrinsics.adoc
new file mode 100644
index 000000000..67cac3d50
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded_intrinsic_funcs/00_bfloat16_vector_loads_and_stores_intrinsics.adoc
@@ -0,0 +1,202 @@
+
+=== BFloat16 Vector Loads and Stores Intrinsics
+
+[[overloaded-bf16-vector-unit-stride-load]]
+==== Vector Unit-Stride Load Intrinsics
+
+[,c]
+----
+// masked functions
+vbfloat16mf4_t __riscv_vle16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16(vbool8_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16(vbool4_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16(vbool2_t vm, const __bf16 *rs1, size_t vl);
+----
+
+[[overloaded-bf16-vector-unit-stride-store]]
+==== Vector Unit-Stride Store Intrinsics
+
+[,c]
+----
+void __riscv_vse16(__bf16 *rs1, vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vse16(__bf16 *rs1, vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vse16(__bf16 *rs1, vbfloat16m1_t vs3, size_t vl);
+void __riscv_vse16(__bf16 *rs1, vbfloat16m2_t vs3, size_t vl);
+void __riscv_vse16(__bf16 *rs1, vbfloat16m4_t vs3, size_t vl);
+void __riscv_vse16(__bf16 *rs1, vbfloat16m8_t vs3, size_t vl);
+// masked functions
+void __riscv_vse16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vse16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vse16(vbool16_t vm, __bf16 *rs1, vbfloat16m1_t vs3, size_t vl);
+void __riscv_vse16(vbool8_t vm, __bf16 *rs1, vbfloat16m2_t vs3, size_t vl);
+void __riscv_vse16(vbool4_t vm, __bf16 *rs1, vbfloat16m4_t vs3, size_t vl);
+void __riscv_vse16(vbool2_t vm, __bf16 *rs1, vbfloat16m8_t vs3, size_t vl);
+----
+
+[[overloaded-vector-strided-load]]
+==== Vector Strided Load Intrinsics
+
+[,c]
+----
+// masked functions
+vbfloat16mf4_t __riscv_vlse16(vbool64_t vm, const __bf16 *rs1, ptrdiff_t rs2,
+                              size_t vl);
+vbfloat16mf2_t __riscv_vlse16(vbool32_t vm, const __bf16 *rs1, ptrdiff_t rs2,
+                              size_t vl);
+vbfloat16m1_t __riscv_vlse16(vbool16_t vm, const __bf16 *rs1, ptrdiff_t rs2,
+                             size_t vl);
+vbfloat16m2_t __riscv_vlse16(vbool8_t vm, const __bf16 *rs1, ptrdiff_t rs2,
+                             size_t vl);
+vbfloat16m4_t __riscv_vlse16(vbool4_t vm, const __bf16 *rs1, ptrdiff_t rs2,
+                             size_t vl);
+vbfloat16m8_t __riscv_vlse16(vbool2_t vm, const __bf16 *rs1, ptrdiff_t rs2,
+                             size_t vl);
+----
+
+[[overloaded-vector-strided-store]]
+==== Vector Strided Store Intrinsics
+
+[,c]
+----
+void __riscv_vsse16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsse16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsse16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1_t vs3, size_t vl);
+void __riscv_vsse16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2_t vs3, size_t vl);
+void __riscv_vsse16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m4_t vs3, size_t vl);
+void __riscv_vsse16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m8_t vs3, size_t vl);
+// masked functions
+void __riscv_vsse16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                    vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsse16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                    vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsse16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2, vbfloat16m1_t vs3,
+                    size_t vl);
+void __riscv_vsse16(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2, vbfloat16m2_t vs3,
+                    size_t vl);
+void __riscv_vsse16(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2, vbfloat16m4_t vs3,
+                    size_t vl);
+void __riscv_vsse16(vbool2_t vm, __bf16 *rs1, ptrdiff_t rs2, vbfloat16m8_t vs3,
+                    size_t vl);
+----
+
+[[overloaded-vector-indexed-load]]
+==== Vector Indexed Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vloxei16(const __bf16 *rs1, vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16(const __bf16 *rs1, vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16(const __bf16 *rs1, vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vloxei16(const __bf16 *rs1, vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vloxei16(const __bf16 *rs1, vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vloxei16(const __bf16 *rs1, vuint16m8_t rs2, size_t vl);
+vbfloat16mf4_t __riscv_vluxei16(const __bf16 *rs1, vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16(const __bf16 *rs1, vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16(const __bf16 *rs1, vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vluxei16(const __bf16 *rs1, vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vluxei16(const __bf16 *rs1, vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vluxei16(const __bf16 *rs1, vuint16m8_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16(vbool64_t vm, const __bf16 *rs1,
+                                vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16(vbool32_t vm, const __bf16 *rs1,
+                                vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16(vbool16_t vm, const __bf16 *rs1, vuint16m1_t rs2,
+                               size_t vl);
+vbfloat16m2_t __riscv_vloxei16(vbool8_t vm, const __bf16 *rs1, vuint16m2_t rs2,
+                               size_t vl);
+vbfloat16m4_t __riscv_vloxei16(vbool4_t vm, const __bf16 *rs1, vuint16m4_t rs2,
+                               size_t vl);
+vbfloat16m8_t __riscv_vloxei16(vbool2_t vm, const __bf16 *rs1, vuint16m8_t rs2,
+                               size_t vl);
+vbfloat16mf4_t __riscv_vluxei16(vbool64_t vm, const __bf16 *rs1,
+                                vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16(vbool32_t vm, const __bf16 *rs1,
+                                vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16(vbool16_t vm, const __bf16 *rs1, vuint16m1_t rs2,
+                               size_t vl);
+vbfloat16m2_t __riscv_vluxei16(vbool8_t vm, const __bf16 *rs1, vuint16m2_t rs2,
+                               size_t vl);
+vbfloat16m4_t __riscv_vluxei16(vbool4_t vm, const __bf16 *rs1, vuint16m4_t rs2,
+                               size_t vl);
+vbfloat16m8_t __riscv_vluxei16(vbool2_t vm, const __bf16 *rs1, vuint16m8_t rs2,
+                               size_t vl);
+----
+
+[[overloaded-vector-indexed-store]]
+==== Vector Indexed Store Intrinsics
+
+[,c]
+----
+void __riscv_vsoxei16(__bf16 *rs1, vuint16mf4_t rs2, vbfloat16mf4_t vs3,
+                      size_t vl);
+void __riscv_vsoxei16(__bf16 *rs1, vuint16mf2_t rs2, vbfloat16mf2_t vs3,
+                      size_t vl);
+void __riscv_vsoxei16(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                      size_t vl);
+void __riscv_vsoxei16(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                      size_t vl);
+void __riscv_vsoxei16(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                      size_t vl);
+void __riscv_vsoxei16(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                      size_t vl);
+void __riscv_vsuxei16(__bf16 *rs1, vuint16mf4_t rs2, vbfloat16mf4_t vs3,
+                      size_t vl);
+void __riscv_vsuxei16(__bf16 *rs1, vuint16mf2_t rs2, vbfloat16mf2_t vs3,
+                      size_t vl);
+void __riscv_vsuxei16(__bf16 *rs1, vuint16m1_t rs2, vbfloat16m1_t vs3,
+                      size_t vl);
+void __riscv_vsuxei16(__bf16 *rs1, vuint16m2_t rs2, vbfloat16m2_t vs3,
+                      size_t vl);
+void __riscv_vsuxei16(__bf16 *rs1, vuint16m4_t rs2, vbfloat16m4_t vs3,
+                      size_t vl);
+void __riscv_vsuxei16(__bf16 *rs1, vuint16m8_t rs2, vbfloat16m8_t vs3,
+                      size_t vl);
+// masked functions
+void __riscv_vsoxei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                      vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsoxei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                      vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsoxei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                      vbfloat16m1_t vs3, size_t vl);
+void __riscv_vsoxei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                      vbfloat16m2_t vs3, size_t vl);
+void __riscv_vsoxei16(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                      vbfloat16m4_t vs3, size_t vl);
+void __riscv_vsoxei16(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                      vbfloat16m8_t vs3, size_t vl);
+void __riscv_vsuxei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t rs2,
+                      vbfloat16mf4_t vs3, size_t vl);
+void __riscv_vsuxei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t rs2,
+                      vbfloat16mf2_t vs3, size_t vl);
+void __riscv_vsuxei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t rs2,
+                      vbfloat16m1_t vs3, size_t vl);
+void __riscv_vsuxei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t rs2,
+                      vbfloat16m2_t vs3, size_t vl);
+void __riscv_vsuxei16(vbool4_t vm, __bf16 *rs1, vuint16m4_t rs2,
+                      vbfloat16m4_t vs3, size_t vl);
+void __riscv_vsuxei16(vbool2_t vm, __bf16 *rs1, vuint16m8_t rs2,
+                      vbfloat16m8_t vs3, size_t vl);
+----
+
+[[overloaded-unit-stride-fault-only-first-loads]]
+==== Unit-stride Fault-Only-First Loads Intrinsics
+
+[,c]
+----
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff(vbool64_t vm, const __bf16 *rs1, size_t *new_vl,
+                               size_t vl);
+vbfloat16mf2_t __riscv_vle16ff(vbool32_t vm, const __bf16 *rs1, size_t *new_vl,
+                               size_t vl);
+vbfloat16m1_t __riscv_vle16ff(vbool16_t vm, const __bf16 *rs1, size_t *new_vl,
+                              size_t vl);
+vbfloat16m2_t __riscv_vle16ff(vbool8_t vm, const __bf16 *rs1, size_t *new_vl,
+                              size_t vl);
+vbfloat16m4_t __riscv_vle16ff(vbool4_t vm, const __bf16 *rs1, size_t *new_vl,
+                              size_t vl);
+vbfloat16m8_t __riscv_vle16ff(vbool2_t vm, const __bf16 *rs1, size_t *new_vl,
+                              size_t vl);
+----
diff --git a/auto-generated/bfloat16/overloaded_intrinsic_funcs/01_bfloat16_vector_loads_and_stores_segment_intrinsics.adoc b/auto-generated/bfloat16/overloaded_intrinsic_funcs/01_bfloat16_vector_loads_and_stores_segment_intrinsics.adoc
new file mode 100644
index 000000000..06d4d0a39
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded_intrinsic_funcs/01_bfloat16_vector_loads_and_stores_segment_intrinsics.adoc
@@ -0,0 +1,750 @@
+
+=== BFloat16 Vector Loads and Stores Segment Intrinsics
+
+[[overloaded-vector-unit-stride-segment-load]]
+==== Vector Unit-Stride Segment Load Intrinsics
+
+[,c]
+----
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16(vbool64_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16(vbool32_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16(vbool16_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16(vbool8_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16(vbool8_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16(vbool8_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16(vbool4_t vm, const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff(vbool64_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff(vbool64_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff(vbool64_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff(vbool64_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff(vbool64_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff(vbool64_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff(vbool64_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff(vbool32_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff(vbool32_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff(vbool32_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff(vbool32_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff(vbool32_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff(vbool32_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff(vbool32_t vm, const __bf16 *rs1,
+                                     size_t *new_vl, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff(vbool16_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff(vbool16_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff(vbool16_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff(vbool16_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff(vbool16_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff(vbool16_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff(vbool16_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff(vbool8_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff(vbool8_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff(vbool8_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff(vbool4_t vm, const __bf16 *rs1,
+                                    size_t *new_vl, size_t vl);
+----
+
+[[overloaded-vecrtor-unit-stride-segment-store]]
+==== Vector Unit-Stride Segment Store Intrinsics
+
+[,c]
+----
+void __riscv_vsseg2e16(__bf16 *rs1, vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vsseg3e16(__bf16 *rs1, vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vsseg4e16(__bf16 *rs1, vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vsseg5e16(__bf16 *rs1, vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vsseg6e16(__bf16 *rs1, vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vsseg7e16(__bf16 *rs1, vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vsseg8e16(__bf16 *rs1, vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vsseg2e16(__bf16 *rs1, vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vsseg3e16(__bf16 *rs1, vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vsseg4e16(__bf16 *rs1, vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vsseg5e16(__bf16 *rs1, vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vsseg6e16(__bf16 *rs1, vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vsseg7e16(__bf16 *rs1, vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vsseg8e16(__bf16 *rs1, vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vsseg2e16(__bf16 *rs1, vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vsseg3e16(__bf16 *rs1, vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vsseg4e16(__bf16 *rs1, vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vsseg5e16(__bf16 *rs1, vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vsseg6e16(__bf16 *rs1, vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vsseg7e16(__bf16 *rs1, vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vsseg8e16(__bf16 *rs1, vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vsseg2e16(__bf16 *rs1, vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vsseg3e16(__bf16 *rs1, vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vsseg4e16(__bf16 *rs1, vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vsseg2e16(__bf16 *rs1, vbfloat16m4x2_t vs3, size_t vl);
+// masked functions
+void __riscv_vsseg2e16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4x2_t vs3,
+                       size_t vl);
+void __riscv_vsseg3e16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4x3_t vs3,
+                       size_t vl);
+void __riscv_vsseg4e16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4x4_t vs3,
+                       size_t vl);
+void __riscv_vsseg5e16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4x5_t vs3,
+                       size_t vl);
+void __riscv_vsseg6e16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4x6_t vs3,
+                       size_t vl);
+void __riscv_vsseg7e16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4x7_t vs3,
+                       size_t vl);
+void __riscv_vsseg8e16(vbool64_t vm, __bf16 *rs1, vbfloat16mf4x8_t vs3,
+                       size_t vl);
+void __riscv_vsseg2e16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2x2_t vs3,
+                       size_t vl);
+void __riscv_vsseg3e16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2x3_t vs3,
+                       size_t vl);
+void __riscv_vsseg4e16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2x4_t vs3,
+                       size_t vl);
+void __riscv_vsseg5e16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2x5_t vs3,
+                       size_t vl);
+void __riscv_vsseg6e16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2x6_t vs3,
+                       size_t vl);
+void __riscv_vsseg7e16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2x7_t vs3,
+                       size_t vl);
+void __riscv_vsseg8e16(vbool32_t vm, __bf16 *rs1, vbfloat16mf2x8_t vs3,
+                       size_t vl);
+void __riscv_vsseg2e16(vbool16_t vm, __bf16 *rs1, vbfloat16m1x2_t vs3,
+                       size_t vl);
+void __riscv_vsseg3e16(vbool16_t vm, __bf16 *rs1, vbfloat16m1x3_t vs3,
+                       size_t vl);
+void __riscv_vsseg4e16(vbool16_t vm, __bf16 *rs1, vbfloat16m1x4_t vs3,
+                       size_t vl);
+void __riscv_vsseg5e16(vbool16_t vm, __bf16 *rs1, vbfloat16m1x5_t vs3,
+                       size_t vl);
+void __riscv_vsseg6e16(vbool16_t vm, __bf16 *rs1, vbfloat16m1x6_t vs3,
+                       size_t vl);
+void __riscv_vsseg7e16(vbool16_t vm, __bf16 *rs1, vbfloat16m1x7_t vs3,
+                       size_t vl);
+void __riscv_vsseg8e16(vbool16_t vm, __bf16 *rs1, vbfloat16m1x8_t vs3,
+                       size_t vl);
+void __riscv_vsseg2e16(vbool8_t vm, __bf16 *rs1, vbfloat16m2x2_t vs3,
+                       size_t vl);
+void __riscv_vsseg3e16(vbool8_t vm, __bf16 *rs1, vbfloat16m2x3_t vs3,
+                       size_t vl);
+void __riscv_vsseg4e16(vbool8_t vm, __bf16 *rs1, vbfloat16m2x4_t vs3,
+                       size_t vl);
+void __riscv_vsseg2e16(vbool4_t vm, __bf16 *rs1, vbfloat16m4x2_t vs3,
+                       size_t vl);
+----
+
+[[overloaded-vector-strided-segment-load]]
+==== Vector Strided Segment Load Intrinsics
+
+[,c]
+----
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16(vbool64_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16(vbool64_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16(vbool64_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16(vbool64_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16(vbool64_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16(vbool64_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16(vbool64_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16(vbool32_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16(vbool32_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16(vbool32_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16(vbool32_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16(vbool32_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16(vbool32_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16(vbool32_t vm, const __bf16 *rs1,
+                                    ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16(vbool16_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16(vbool16_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16(vbool16_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16(vbool16_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16(vbool16_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16(vbool16_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16(vbool16_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16(vbool8_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16(vbool8_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16(vbool8_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16(vbool4_t vm, const __bf16 *rs1,
+                                   ptrdiff_t rs2, size_t vl);
+----
+
+[[overloaded-vector-strided-segment-store]]
+==== Vector Strided Segment Store Intrinsics
+
+[,c]
+----
+void __riscv_vssseg2e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4x2_t vs3,
+                        size_t vl);
+void __riscv_vssseg3e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4x3_t vs3,
+                        size_t vl);
+void __riscv_vssseg4e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4x4_t vs3,
+                        size_t vl);
+void __riscv_vssseg5e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4x5_t vs3,
+                        size_t vl);
+void __riscv_vssseg6e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4x6_t vs3,
+                        size_t vl);
+void __riscv_vssseg7e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4x7_t vs3,
+                        size_t vl);
+void __riscv_vssseg8e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf4x8_t vs3,
+                        size_t vl);
+void __riscv_vssseg2e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2x2_t vs3,
+                        size_t vl);
+void __riscv_vssseg3e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2x3_t vs3,
+                        size_t vl);
+void __riscv_vssseg4e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2x4_t vs3,
+                        size_t vl);
+void __riscv_vssseg5e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2x5_t vs3,
+                        size_t vl);
+void __riscv_vssseg6e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2x6_t vs3,
+                        size_t vl);
+void __riscv_vssseg7e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2x7_t vs3,
+                        size_t vl);
+void __riscv_vssseg8e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16mf2x8_t vs3,
+                        size_t vl);
+void __riscv_vssseg2e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x2_t vs3,
+                        size_t vl);
+void __riscv_vssseg3e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x3_t vs3,
+                        size_t vl);
+void __riscv_vssseg4e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x4_t vs3,
+                        size_t vl);
+void __riscv_vssseg5e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x5_t vs3,
+                        size_t vl);
+void __riscv_vssseg6e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x6_t vs3,
+                        size_t vl);
+void __riscv_vssseg7e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x7_t vs3,
+                        size_t vl);
+void __riscv_vssseg8e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m1x8_t vs3,
+                        size_t vl);
+void __riscv_vssseg2e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x2_t vs3,
+                        size_t vl);
+void __riscv_vssseg3e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x3_t vs3,
+                        size_t vl);
+void __riscv_vssseg4e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m2x4_t vs3,
+                        size_t vl);
+void __riscv_vssseg2e16(__bf16 *rs1, ptrdiff_t rs2, vbfloat16m4x2_t vs3,
+                        size_t vl);
+// masked functions
+void __riscv_vssseg2e16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vssseg3e16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vssseg4e16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vssseg5e16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vssseg6e16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vssseg7e16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vssseg8e16(vbool64_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vssseg2e16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vssseg3e16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vssseg4e16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vssseg5e16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vssseg6e16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vssseg7e16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vssseg8e16(vbool32_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vssseg2e16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vssseg3e16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vssseg4e16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vssseg5e16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vssseg6e16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vssseg7e16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vssseg8e16(vbool16_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vssseg2e16(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vssseg3e16(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vssseg4e16(vbool8_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vssseg2e16(vbool4_t vm, __bf16 *rs1, ptrdiff_t rs2,
+                        vbfloat16m4x2_t vs3, size_t vl);
+----
+
+[[overloaded-vector-indexed-segment-load]]
+==== Vector Indexed Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vloxseg2ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16(const __bf16 *rs1, vuint16m4_t rs2,
+                                     size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16(const __bf16 *rs1, vuint16mf4_t rs2,
+                                      size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16(const __bf16 *rs1, vuint16mf2_t rs2,
+                                      size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16(const __bf16 *rs1, vuint16m1_t rs2,
+                                     size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16(const __bf16 *rs1, vuint16m2_t rs2,
+                                     size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16(const __bf16 *rs1, vuint16m4_t rs2,
+                                     size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16(vbool8_t vm, const __bf16 *rs1,
+                                     vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16(vbool8_t vm, const __bf16 *rs1,
+                                     vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16(vbool8_t vm, const __bf16 *rs1,
+                                     vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16(vbool4_t vm, const __bf16 *rs1,
+                                     vuint16m4_t rs2, size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16(vbool64_t vm, const __bf16 *rs1,
+                                      vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16(vbool32_t vm, const __bf16 *rs1,
+                                      vuint16mf2_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16(vbool16_t vm, const __bf16 *rs1,
+                                     vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16(vbool8_t vm, const __bf16 *rs1,
+                                     vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16(vbool8_t vm, const __bf16 *rs1,
+                                     vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16(vbool8_t vm, const __bf16 *rs1,
+                                     vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16(vbool4_t vm, const __bf16 *rs1,
+                                     vuint16m4_t rs2, size_t vl);
+----
+
+[[overloaded-vector-indexed-segment-store]]
+==== Vector Indexed Segment Store Intrinsics
+
+[,c]
+----
+void __riscv_vsoxseg2ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg3ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg4ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg5ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg6ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg7ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg8ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg2ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg3ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg4ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg5ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg6ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg7ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg8ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg2ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x2_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg3ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x3_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg4ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x4_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg5ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x5_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg6ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x6_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg7ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x7_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg8ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x8_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg2ei16(__bf16 *rs1, vuint16m2_t vs2, vbfloat16m2x2_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg3ei16(__bf16 *rs1, vuint16m2_t vs2, vbfloat16m2x3_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg4ei16(__bf16 *rs1, vuint16m2_t vs2, vbfloat16m2x4_t vs3,
+                          size_t vl);
+void __riscv_vsoxseg2ei16(__bf16 *rs1, vuint16m4_t vs2, vbfloat16m4x2_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg2ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x2_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg3ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x3_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg4ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x4_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg5ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x5_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg6ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x6_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg7ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x7_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg8ei16(__bf16 *rs1, vuint16mf4_t vs2, vbfloat16mf4x8_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg2ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x2_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg3ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x3_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg4ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x4_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg5ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x5_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg6ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x6_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg7ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x7_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg8ei16(__bf16 *rs1, vuint16mf2_t vs2, vbfloat16mf2x8_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg2ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x2_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg3ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x3_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg4ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x4_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg5ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x5_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg6ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x6_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg7ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x7_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg8ei16(__bf16 *rs1, vuint16m1_t vs2, vbfloat16m1x8_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg2ei16(__bf16 *rs1, vuint16m2_t vs2, vbfloat16m2x2_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg3ei16(__bf16 *rs1, vuint16m2_t vs2, vbfloat16m2x3_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg4ei16(__bf16 *rs1, vuint16m2_t vs2, vbfloat16m2x4_t vs3,
+                          size_t vl);
+void __riscv_vsuxseg2ei16(__bf16 *rs1, vuint16m4_t vs2, vbfloat16m4x2_t vs3,
+                          size_t vl);
+// masked functions
+void __riscv_vsoxseg2ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vsoxseg5ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vsoxseg6ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vsoxseg7ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vsoxseg8ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vsoxseg5ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vsoxseg6ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vsoxseg7ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vsoxseg8ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vsoxseg5ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vsoxseg6ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vsoxseg7ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vsoxseg8ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                          vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vsoxseg3ei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                          vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vsoxseg4ei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                          vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vsoxseg2ei16(vbool4_t vm, __bf16 *rs1, vuint16m4_t vs2,
+                          vbfloat16m4x2_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x4_t vs3, size_t vl);
+void __riscv_vsuxseg5ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x5_t vs3, size_t vl);
+void __riscv_vsuxseg6ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x6_t vs3, size_t vl);
+void __riscv_vsuxseg7ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x7_t vs3, size_t vl);
+void __riscv_vsuxseg8ei16(vbool64_t vm, __bf16 *rs1, vuint16mf4_t vs2,
+                          vbfloat16mf4x8_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x4_t vs3, size_t vl);
+void __riscv_vsuxseg5ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x5_t vs3, size_t vl);
+void __riscv_vsuxseg6ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x6_t vs3, size_t vl);
+void __riscv_vsuxseg7ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x7_t vs3, size_t vl);
+void __riscv_vsuxseg8ei16(vbool32_t vm, __bf16 *rs1, vuint16mf2_t vs2,
+                          vbfloat16mf2x8_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x4_t vs3, size_t vl);
+void __riscv_vsuxseg5ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x5_t vs3, size_t vl);
+void __riscv_vsuxseg6ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x6_t vs3, size_t vl);
+void __riscv_vsuxseg7ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x7_t vs3, size_t vl);
+void __riscv_vsuxseg8ei16(vbool16_t vm, __bf16 *rs1, vuint16m1_t vs2,
+                          vbfloat16m1x8_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                          vbfloat16m2x2_t vs3, size_t vl);
+void __riscv_vsuxseg3ei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                          vbfloat16m2x3_t vs3, size_t vl);
+void __riscv_vsuxseg4ei16(vbool8_t vm, __bf16 *rs1, vuint16m2_t vs2,
+                          vbfloat16m2x4_t vs3, size_t vl);
+void __riscv_vsuxseg2ei16(vbool4_t vm, __bf16 *rs1, vuint16m4_t vs2,
+                          vbfloat16m4x2_t vs3, size_t vl);
+----
diff --git a/auto-generated/bfloat16/overloaded_intrinsic_funcs/02_bfloat16_convert_intrinsics.adoc b/auto-generated/bfloat16/overloaded_intrinsic_funcs/02_bfloat16_convert_intrinsics.adoc
new file mode 100644
index 000000000..151c6c4ec
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded_intrinsic_funcs/02_bfloat16_convert_intrinsics.adoc
@@ -0,0 +1,59 @@
+
+=== BFloat16 Convert Intrinsics
+
+[[overloaded-bf16-vector-narrow-convert]]
+==== Vector Narrowing Convert Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vfncvtbf16_f(vfloat32mf2_t vs2, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f(vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f(vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f(vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f(vfloat32m8_t vs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f(vbool64_t vm, vfloat32mf2_t vs2, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f(vbool32_t vm, vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f(vbool16_t vm, vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f(vbool8_t vm, vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f(vbool4_t vm, vfloat32m8_t vs2, size_t vl);
+vbfloat16mf4_t __riscv_vfncvtbf16_f(vfloat32mf2_t vs2, unsigned int frm,
+                                    size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f(vfloat32m1_t vs2, unsigned int frm,
+                                    size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f(vfloat32m2_t vs2, unsigned int frm,
+                                   size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f(vfloat32m4_t vs2, unsigned int frm,
+                                   size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f(vfloat32m8_t vs2, unsigned int frm,
+                                   size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f(vbool64_t vm, vfloat32mf2_t vs2,
+                                    unsigned int frm, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f(vbool32_t vm, vfloat32m1_t vs2,
+                                    unsigned int frm, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f(vbool16_t vm, vfloat32m2_t vs2,
+                                   unsigned int frm, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f(vbool8_t vm, vfloat32m4_t vs2,
+                                   unsigned int frm, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f(vbool4_t vm, vfloat32m8_t vs2,
+                                   unsigned int frm, size_t vl);
+----
+
+[[overloaded-bf16-vector-widening-convert]]
+==== Vector Widening Convert Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwcvtbf16_f(vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f(vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f(vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f(vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f(vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f(vbool64_t vm, vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f(vbool32_t vm, vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f(vbool16_t vm, vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f(vbool8_t vm, vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f(vbool4_t vm, vbfloat16m4_t vs2, size_t vl);
+----
diff --git a/auto-generated/bfloat16/overloaded_intrinsic_funcs/03_bfloat16_arithmetic_intrinsics.adoc b/auto-generated/bfloat16/overloaded_intrinsic_funcs/03_bfloat16_arithmetic_intrinsics.adoc
new file mode 100644
index 000000000..f62b14fba
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded_intrinsic_funcs/03_bfloat16_arithmetic_intrinsics.adoc
@@ -0,0 +1,113 @@
+
+=== BFloat16 Arithmetic Intrinsics
+
+[[overloaded-bf16-widening-multiply-accumulate]]
+==== Vector Widening Multiply-Accumulate Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwmaccbf16(vfloat32mf2_t vd, vbfloat16mf4_t vs1,
+                                  vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16(vfloat32mf2_t vd, __bf16 vs1,
+                                  vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                 vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vfloat32m1_t vd, __bf16 vs1,
+                                 vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                 vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vfloat32m2_t vd, __bf16 vs1, vbfloat16m1_t vs2,
+                                 size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                 vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vfloat32m4_t vd, __bf16 vs1, vbfloat16m2_t vs2,
+                                 size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                 vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vfloat32m8_t vd, __bf16 vs1, vbfloat16m4_t vs2,
+                                 size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16(vbool64_t vm, vfloat32mf2_t vd,
+                                  vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                  size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16(vbool64_t vm, vfloat32mf2_t vd, __bf16 vs1,
+                                  vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vbool32_t vm, vfloat32m1_t vd,
+                                 vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                 size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                 vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vbool16_t vm, vfloat32m2_t vd,
+                                 vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                 size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                 vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vbool8_t vm, vfloat32m4_t vd,
+                                 vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                 size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                 vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vbool4_t vm, vfloat32m8_t vd,
+                                 vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                 size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                 vbfloat16m4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16(vfloat32mf2_t vd, vbfloat16mf4_t vs1,
+                                  vbfloat16mf4_t vs2, unsigned int frm,
+                                  size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16(vfloat32mf2_t vd, __bf16 vs1,
+                                  vbfloat16mf4_t vs2, unsigned int frm,
+                                  size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                 vbfloat16mf2_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vfloat32m1_t vd, __bf16 vs1,
+                                 vbfloat16mf2_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                 vbfloat16m1_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vfloat32m2_t vd, __bf16 vs1, vbfloat16m1_t vs2,
+                                 unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                 vbfloat16m2_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vfloat32m4_t vd, __bf16 vs1, vbfloat16m2_t vs2,
+                                 unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                 vbfloat16m4_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vfloat32m8_t vd, __bf16 vs1, vbfloat16m4_t vs2,
+                                 unsigned int frm, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16(vbool64_t vm, vfloat32mf2_t vd,
+                                  vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                  unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16(vbool64_t vm, vfloat32mf2_t vd, __bf16 vs1,
+                                  vbfloat16mf4_t vs2, unsigned int frm,
+                                  size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vbool32_t vm, vfloat32m1_t vd,
+                                 vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                 unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                 vbfloat16mf2_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vbool16_t vm, vfloat32m2_t vd,
+                                 vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                 unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                 vbfloat16m1_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vbool8_t vm, vfloat32m4_t vd,
+                                 vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                 unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                 vbfloat16m2_t vs2, unsigned int frm,
+                                 size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vbool4_t vm, vfloat32m8_t vd,
+                                 vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                 unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                 vbfloat16m4_t vs2, unsigned int frm,
+                                 size_t vl);
+----
diff --git a/auto-generated/bfloat16/overloaded_intrinsic_funcs/04_bfloat16_miscellaneous_vector_utility_intrinsics.adoc b/auto-generated/bfloat16/overloaded_intrinsic_funcs/04_bfloat16_miscellaneous_vector_utility_intrinsics.adoc
new file mode 100644
index 000000000..70ab53219
--- /dev/null
+++ b/auto-generated/bfloat16/overloaded_intrinsic_funcs/04_bfloat16_miscellaneous_vector_utility_intrinsics.adoc
@@ -0,0 +1,193 @@
+
+=== BFloat16 Miscellaneous Vector Utility Intrinsics
+
+[[overloaded-reinterpret-cast-conversion]]
+==== Reinterpret Cast Conversion Intrinsics
+
+[,c]
+----
+// Reinterpret between different type under the same SEW/LMUL
+vbfloat16mf4_t __riscv_vreinterpret_bf16mf4(vint16mf4_t src);
+vbfloat16mf2_t __riscv_vreinterpret_bf16mf2(vint16mf2_t src);
+vbfloat16m1_t __riscv_vreinterpret_bf16m1(vint16m1_t src);
+vbfloat16m2_t __riscv_vreinterpret_bf16m2(vint16m2_t src);
+vbfloat16m4_t __riscv_vreinterpret_bf16m4(vint16m4_t src);
+vbfloat16m8_t __riscv_vreinterpret_bf16m8(vint16m8_t src);
+vbfloat16mf4_t __riscv_vreinterpret_bf16mf4(vuint16mf4_t src);
+vbfloat16mf2_t __riscv_vreinterpret_bf16mf2(vuint16mf2_t src);
+vbfloat16m1_t __riscv_vreinterpret_bf16m1(vuint16m1_t src);
+vbfloat16m2_t __riscv_vreinterpret_bf16m2(vuint16m2_t src);
+vbfloat16m4_t __riscv_vreinterpret_bf16m4(vuint16m4_t src);
+vbfloat16m8_t __riscv_vreinterpret_bf16m8(vuint16m8_t src);
+vint16mf4_t __riscv_vreinterpret_i16mf4(vbfloat16mf4_t src);
+vint16mf2_t __riscv_vreinterpret_i16mf2(vbfloat16mf2_t src);
+vint16m1_t __riscv_vreinterpret_i16m1(vbfloat16m1_t src);
+vint16m2_t __riscv_vreinterpret_i16m2(vbfloat16m2_t src);
+vint16m4_t __riscv_vreinterpret_i16m4(vbfloat16m4_t src);
+vint16m8_t __riscv_vreinterpret_i16m8(vbfloat16m8_t src);
+vuint16mf4_t __riscv_vreinterpret_u16mf4(vbfloat16mf4_t src);
+vuint16mf2_t __riscv_vreinterpret_u16mf2(vbfloat16mf2_t src);
+vuint16m1_t __riscv_vreinterpret_u16m1(vbfloat16m1_t src);
+vuint16m2_t __riscv_vreinterpret_u16m2(vbfloat16m2_t src);
+vuint16m4_t __riscv_vreinterpret_u16m4(vbfloat16m4_t src);
+vuint16m8_t __riscv_vreinterpret_u16m8(vbfloat16m8_t src);
+----
+
+[[overloaded-vector-lmul-extensionn]]
+==== Vector LMUL Extension Intrinsics
+
+[,c]
+----
+vbfloat16mf2_t __riscv_vlmul_ext_bf16mf2(vbfloat16mf4_t value);
+vbfloat16m1_t __riscv_vlmul_ext_bf16m1(vbfloat16mf4_t value);
+vbfloat16m2_t __riscv_vlmul_ext_bf16m2(vbfloat16mf4_t value);
+vbfloat16m4_t __riscv_vlmul_ext_bf16m4(vbfloat16mf4_t value);
+vbfloat16m8_t __riscv_vlmul_ext_bf16m8(vbfloat16mf4_t value);
+vbfloat16m1_t __riscv_vlmul_ext_bf16m1(vbfloat16mf2_t value);
+vbfloat16m2_t __riscv_vlmul_ext_bf16m2(vbfloat16mf2_t value);
+vbfloat16m4_t __riscv_vlmul_ext_bf16m4(vbfloat16mf2_t value);
+vbfloat16m8_t __riscv_vlmul_ext_bf16m8(vbfloat16mf2_t value);
+vbfloat16m2_t __riscv_vlmul_ext_bf16m2(vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vlmul_ext_bf16m4(vbfloat16m1_t value);
+vbfloat16m8_t __riscv_vlmul_ext_bf16m8(vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vlmul_ext_bf16m4(vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vlmul_ext_bf16m8(vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vlmul_ext_bf16m8(vbfloat16m4_t value);
+----
+
+[[overloaded-vector-lmul-truncation]]
+==== Vector LMUL Truncation Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vlmul_trunc_bf16mf4(vbfloat16mf2_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_bf16mf4(vbfloat16m1_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_bf16mf2(vbfloat16m1_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_bf16mf4(vbfloat16m2_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_bf16mf2(vbfloat16m2_t value);
+vbfloat16m1_t __riscv_vlmul_trunc_bf16m1(vbfloat16m2_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_bf16mf4(vbfloat16m4_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_bf16mf2(vbfloat16m4_t value);
+vbfloat16m1_t __riscv_vlmul_trunc_bf16m1(vbfloat16m4_t value);
+vbfloat16m2_t __riscv_vlmul_trunc_bf16m2(vbfloat16m4_t value);
+vbfloat16mf4_t __riscv_vlmul_trunc_bf16mf4(vbfloat16m8_t value);
+vbfloat16mf2_t __riscv_vlmul_trunc_bf16mf2(vbfloat16m8_t value);
+vbfloat16m1_t __riscv_vlmul_trunc_bf16m1(vbfloat16m8_t value);
+vbfloat16m2_t __riscv_vlmul_trunc_bf16m2(vbfloat16m8_t value);
+vbfloat16m4_t __riscv_vlmul_trunc_bf16m4(vbfloat16m8_t value);
+----
+
+[[overloaded-vector-initialization]]
+==== Vector Initialization Intrinsics
+Intrinsics here don't have an overloaded variant.
+
+[[overloaded-vector-insertion]]
+==== Vector Insertion Intrinsics
+
+[,c]
+----
+vbfloat16m2_t __riscv_vset(vbfloat16m2_t dest, size_t index,
+                           vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vset(vbfloat16m4_t dest, size_t index,
+                           vbfloat16m1_t value);
+vbfloat16m4_t __riscv_vset(vbfloat16m4_t dest, size_t index,
+                           vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vset(vbfloat16m8_t dest, size_t index,
+                           vbfloat16m1_t value);
+vbfloat16m8_t __riscv_vset(vbfloat16m8_t dest, size_t index,
+                           vbfloat16m2_t value);
+vbfloat16m8_t __riscv_vset(vbfloat16m8_t dest, size_t index,
+                           vbfloat16m4_t value);
+vbfloat16mf4x2_t __riscv_vset(vbfloat16mf4x2_t dest, size_t index,
+                              vbfloat16mf4_t value);
+vbfloat16mf4x3_t __riscv_vset(vbfloat16mf4x3_t dest, size_t index,
+                              vbfloat16mf4_t value);
+vbfloat16mf4x4_t __riscv_vset(vbfloat16mf4x4_t dest, size_t index,
+                              vbfloat16mf4_t value);
+vbfloat16mf4x5_t __riscv_vset(vbfloat16mf4x5_t dest, size_t index,
+                              vbfloat16mf4_t value);
+vbfloat16mf4x6_t __riscv_vset(vbfloat16mf4x6_t dest, size_t index,
+                              vbfloat16mf4_t value);
+vbfloat16mf4x7_t __riscv_vset(vbfloat16mf4x7_t dest, size_t index,
+                              vbfloat16mf4_t value);
+vbfloat16mf4x8_t __riscv_vset(vbfloat16mf4x8_t dest, size_t index,
+                              vbfloat16mf4_t value);
+vbfloat16mf2x2_t __riscv_vset(vbfloat16mf2x2_t dest, size_t index,
+                              vbfloat16mf2_t value);
+vbfloat16mf2x3_t __riscv_vset(vbfloat16mf2x3_t dest, size_t index,
+                              vbfloat16mf2_t value);
+vbfloat16mf2x4_t __riscv_vset(vbfloat16mf2x4_t dest, size_t index,
+                              vbfloat16mf2_t value);
+vbfloat16mf2x5_t __riscv_vset(vbfloat16mf2x5_t dest, size_t index,
+                              vbfloat16mf2_t value);
+vbfloat16mf2x6_t __riscv_vset(vbfloat16mf2x6_t dest, size_t index,
+                              vbfloat16mf2_t value);
+vbfloat16mf2x7_t __riscv_vset(vbfloat16mf2x7_t dest, size_t index,
+                              vbfloat16mf2_t value);
+vbfloat16mf2x8_t __riscv_vset(vbfloat16mf2x8_t dest, size_t index,
+                              vbfloat16mf2_t value);
+vbfloat16m1x2_t __riscv_vset(vbfloat16m1x2_t dest, size_t index,
+                             vbfloat16m1_t value);
+vbfloat16m1x3_t __riscv_vset(vbfloat16m1x3_t dest, size_t index,
+                             vbfloat16m1_t value);
+vbfloat16m1x4_t __riscv_vset(vbfloat16m1x4_t dest, size_t index,
+                             vbfloat16m1_t value);
+vbfloat16m1x5_t __riscv_vset(vbfloat16m1x5_t dest, size_t index,
+                             vbfloat16m1_t value);
+vbfloat16m1x6_t __riscv_vset(vbfloat16m1x6_t dest, size_t index,
+                             vbfloat16m1_t value);
+vbfloat16m1x7_t __riscv_vset(vbfloat16m1x7_t dest, size_t index,
+                             vbfloat16m1_t value);
+vbfloat16m1x8_t __riscv_vset(vbfloat16m1x8_t dest, size_t index,
+                             vbfloat16m1_t value);
+vbfloat16m2x2_t __riscv_vset(vbfloat16m2x2_t dest, size_t index,
+                             vbfloat16m2_t value);
+vbfloat16m2x3_t __riscv_vset(vbfloat16m2x3_t dest, size_t index,
+                             vbfloat16m2_t value);
+vbfloat16m2x4_t __riscv_vset(vbfloat16m2x4_t dest, size_t index,
+                             vbfloat16m2_t value);
+vbfloat16m4x2_t __riscv_vset(vbfloat16m4x2_t dest, size_t index,
+                             vbfloat16m4_t value);
+----
+
+[[overloaded-vector-extraction]]
+==== Vector Extraction Intrinsics
+
+[,c]
+----
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m2_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m4_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m8_t src, size_t index);
+vbfloat16m2_t __riscv_vget_bf16m2(vbfloat16m4_t src, size_t index);
+vbfloat16m2_t __riscv_vget_bf16m2(vbfloat16m8_t src, size_t index);
+vbfloat16m4_t __riscv_vget_bf16m4(vbfloat16m8_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_bf16mf4(vbfloat16mf4x2_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_bf16mf4(vbfloat16mf4x3_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_bf16mf4(vbfloat16mf4x4_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_bf16mf4(vbfloat16mf4x5_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_bf16mf4(vbfloat16mf4x6_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_bf16mf4(vbfloat16mf4x7_t src, size_t index);
+vbfloat16mf4_t __riscv_vget_bf16mf4(vbfloat16mf4x8_t src, size_t index);
+vbfloat16mf2_t __riscv_vget_bf16mf2(vbfloat16mf2x2_t src, size_t index);
+vbfloat16mf2_t __riscv_vget_bf16mf2(vbfloat16mf2x3_t src, size_t index);
+vbfloat16mf2_t __riscv_vget_bf16mf2(vbfloat16mf2x4_t src, size_t index);
+vbfloat16mf2_t __riscv_vget_bf16mf2(vbfloat16mf2x5_t src, size_t index);
+vbfloat16mf2_t __riscv_vget_bf16mf2(vbfloat16mf2x6_t src, size_t index);
+vbfloat16mf2_t __riscv_vget_bf16mf2(vbfloat16mf2x7_t src, size_t index);
+vbfloat16mf2_t __riscv_vget_bf16mf2(vbfloat16mf2x8_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m1x2_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m1x3_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m1x4_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m1x5_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m1x6_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m1x7_t src, size_t index);
+vbfloat16m1_t __riscv_vget_bf16m1(vbfloat16m1x8_t src, size_t index);
+vbfloat16m2_t __riscv_vget_bf16m2(vbfloat16m2x2_t src, size_t index);
+vbfloat16m2_t __riscv_vget_bf16m2(vbfloat16m2x3_t src, size_t index);
+vbfloat16m2_t __riscv_vget_bf16m2(vbfloat16m2x4_t src, size_t index);
+vbfloat16m4_t __riscv_vget_bf16m4(vbfloat16m4x2_t src, size_t index);
+----
+
+[[overloaded-vector-creation]]
+==== Vector Creation Intrinsics
+Intrinsics here don't have an overloaded variant.
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vfncvtbf16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vfncvtbf16.c
new file mode 100644
index 000000000..c408c7a42
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vfncvtbf16.c
@@ -0,0 +1,243 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_tu(vbfloat16mf4_t vd,
+                                                vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_tu(vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_tu(vbfloat16mf2_t vd,
+                                                vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_tu(vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_tu(vbfloat16m1_t vd,
+                                              vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_tu(vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_tu(vbfloat16m2_t vd,
+                                              vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_tu(vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_tu(vbfloat16m4_t vd,
+                                              vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_tu(vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_tum(vbool64_t vm,
+                                                 vbfloat16mf4_t vd,
+                                                 vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_tum(vbool32_t vm,
+                                                 vbfloat16mf2_t vd,
+                                                 vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                               vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                               vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                               vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_tumu(vbool64_t vm,
+                                                  vbfloat16mf4_t vd,
+                                                  vfloat32mf2_t vs2,
+                                                  size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_tumu(vbool32_t vm,
+                                                  vbfloat16mf2_t vd,
+                                                  vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                                vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                                vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                                vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                                vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                                vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                              vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                              vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                              vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_tu(vbfloat16mf4_t vd,
+                                                   vfloat32mf2_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_tu(vbfloat16mf2_t vd,
+                                                   vfloat32m1_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_tu(vbfloat16m1_t vd,
+                                                 vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_rm_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_tu(vbfloat16m2_t vd,
+                                                 vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_rm_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_tu(vbfloat16m4_t vd,
+                                                 vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_rm_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_tum(vbool64_t vm,
+                                                    vbfloat16mf4_t vd,
+                                                    vfloat32mf2_t vs2,
+                                                    size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_tum(vm, vd, vs2, __RISCV_FRM_RNE,
+                                                 vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_tum(vbool32_t vm,
+                                                    vbfloat16mf2_t vd,
+                                                    vfloat32m1_t vs2,
+                                                    size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_tum(vm, vd, vs2, __RISCV_FRM_RNE,
+                                                 vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_tum(vbool16_t vm,
+                                                  vbfloat16m1_t vd,
+                                                  vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_rm_tum(vm, vd, vs2, __RISCV_FRM_RNE,
+                                                vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                                  vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_rm_tum(vm, vd, vs2, __RISCV_FRM_RNE,
+                                                vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                                  vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_rm_tum(vm, vd, vs2, __RISCV_FRM_RNE,
+                                                vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_tumu(vbool64_t vm,
+                                                     vbfloat16mf4_t vd,
+                                                     vfloat32mf2_t vs2,
+                                                     size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_tumu(vm, vd, vs2, __RISCV_FRM_RNE,
+                                                  vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_tumu(vbool32_t vm,
+                                                     vbfloat16mf2_t vd,
+                                                     vfloat32m1_t vs2,
+                                                     size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_tumu(vm, vd, vs2, __RISCV_FRM_RNE,
+                                                  vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_tumu(vbool16_t vm,
+                                                   vbfloat16m1_t vd,
+                                                   vfloat32m2_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_rm_tumu(vm, vd, vs2, __RISCV_FRM_RNE,
+                                                 vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_tumu(vbool8_t vm,
+                                                   vbfloat16m2_t vd,
+                                                   vfloat32m4_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_rm_tumu(vm, vd, vs2, __RISCV_FRM_RNE,
+                                                 vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_tumu(vbool4_t vm,
+                                                   vbfloat16m4_t vd,
+                                                   vfloat32m8_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_rm_tumu(vm, vd, vs2, __RISCV_FRM_RNE,
+                                                 vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_mu(vbool64_t vm,
+                                                   vbfloat16mf4_t vd,
+                                                   vfloat32mf2_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_mu(vm, vd, vs2, __RISCV_FRM_RNE,
+                                                vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_mu(vbool32_t vm,
+                                                   vbfloat16mf2_t vd,
+                                                   vfloat32m1_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_mu(vm, vd, vs2, __RISCV_FRM_RNE,
+                                                vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                                 vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_rm_mu(vm, vd, vs2, __RISCV_FRM_RNE,
+                                               vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                                 vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_rm_mu(vm, vd, vs2, __RISCV_FRM_RNE,
+                                               vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                                 vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_rm_mu(vm, vd, vs2, __RISCV_FRM_RNE,
+                                               vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vfwcvtbf16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vfwcvtbf16.c
new file mode 100644
index 000000000..9ce24c6ba
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vfwcvtbf16.c
@@ -0,0 +1,102 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_tu(vfloat32mf2_t vd,
+                                              vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32mf2_tu(vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_tu(vfloat32m1_t vd, vbfloat16mf2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m1_tu(vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_tu(vfloat32m2_t vd, vbfloat16m1_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m2_tu(vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_tu(vfloat32m4_t vd, vbfloat16m2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m4_tu(vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_tu(vfloat32m8_t vd, vbfloat16m4_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m8_tu(vd, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                               vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32mf2_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_tum(vbool32_t vm, vfloat32m1_t vd,
+                                             vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m1_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_tum(vbool16_t vm, vfloat32m2_t vd,
+                                             vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m2_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_tum(vbool8_t vm, vfloat32m4_t vd,
+                                             vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m4_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_tum(vbool4_t vm, vfloat32m8_t vd,
+                                             vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m8_tum(vm, vd, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                                vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32mf2_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                              vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m1_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                              vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m2_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                              vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m4_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                              vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m8_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                              vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32mf2_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_mu(vbool32_t vm, vfloat32m1_t vd,
+                                            vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m1_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_mu(vbool16_t vm, vfloat32m2_t vd,
+                                            vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m2_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_mu(vbool8_t vm, vfloat32m4_t vd,
+                                            vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m4_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_mu(vbool4_t vm, vfloat32m8_t vd,
+                                            vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m8_mu(vm, vd, vs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vfwmaccbf16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vfwmaccbf16.c
new file mode 100644
index 000000000..bc8c20900
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vfwmaccbf16.c
@@ -0,0 +1,496 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_tu(vfloat32mf2_t vd,
+                                            vbfloat16mf4_t vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_tu(vfloat32mf2_t vd, __bf16 vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_tu(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_tu(vfloat32m1_t vd, __bf16 vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_tu(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                          vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_tu(vfloat32m2_t vd, __bf16 vs1,
+                                          vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_tu(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                          vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_tu(vfloat32m4_t vd, __bf16 vs1,
+                                          vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_tu(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                          vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_tu(vfloat32m8_t vd, __bf16 vs1,
+                                          vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                             vbfloat16mf4_t vs1,
+                                             vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                             __bf16 vs1, vbfloat16mf4_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_tum(vbool32_t vm, vfloat32m1_t vd,
+                                           vbfloat16mf2_t vs1,
+                                           vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_tum(vbool32_t vm, vfloat32m1_t vd,
+                                           __bf16 vs1, vbfloat16mf2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_tum(vbool16_t vm, vfloat32m2_t vd,
+                                           vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_tum(vbool16_t vm, vfloat32m2_t vd,
+                                           __bf16 vs1, vbfloat16m1_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_tum(vbool8_t vm, vfloat32m4_t vd,
+                                           vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_tum(vbool8_t vm, vfloat32m4_t vd,
+                                           __bf16 vs1, vbfloat16m2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_tum(vbool4_t vm, vfloat32m8_t vd,
+                                           vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_tum(vbool4_t vm, vfloat32m8_t vd,
+                                           __bf16 vs1, vbfloat16m4_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                              vbfloat16mf4_t vs1,
+                                              vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                              __bf16 vs1, vbfloat16mf4_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                            vbfloat16mf2_t vs1,
+                                            vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                            __bf16 vs1, vbfloat16mf2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                            vbfloat16m1_t vs1,
+                                            vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                            __bf16 vs1, vbfloat16m1_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                            vbfloat16m2_t vs1,
+                                            vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                            __bf16 vs1, vbfloat16m2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                            vbfloat16m4_t vs1,
+                                            vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                            __bf16 vs1, vbfloat16m4_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                            vbfloat16mf4_t vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                            __bf16 vs1, vbfloat16mf4_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_mu(vbool32_t vm, vfloat32m1_t vd,
+                                          vbfloat16mf2_t vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_mu(vbool32_t vm, vfloat32m1_t vd,
+                                          __bf16 vs1, vbfloat16mf2_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_mu(vbool16_t vm, vfloat32m2_t vd,
+                                          vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_mu(vbool16_t vm, vfloat32m2_t vd,
+                                          __bf16 vs1, vbfloat16m1_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_mu(vbool8_t vm, vfloat32m4_t vd,
+                                          vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_mu(vbool8_t vm, vfloat32m4_t vd,
+                                          __bf16 vs1, vbfloat16m2_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_mu(vbool4_t vm, vfloat32m8_t vd,
+                                          vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_mu(vbool4_t vm, vfloat32m8_t vd,
+                                          __bf16 vs1, vbfloat16m4_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_tu(vfloat32mf2_t vd,
+                                               vbfloat16mf4_t vs1,
+                                               vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_tu(vfloat32mf2_t vd, __bf16 vs1,
+                                               vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_tu(vfloat32m1_t vd,
+                                             vbfloat16mf2_t vs1,
+                                             vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_tu(vfloat32m1_t vd, __bf16 vs1,
+                                             vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_tu(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                             vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_tu(vfloat32m2_t vd, __bf16 vs1,
+                                             vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_tu(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                             vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_tu(vfloat32m4_t vd, __bf16 vs1,
+                                             vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_tu(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                             vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_tu(vfloat32m8_t vd, __bf16 vs1,
+                                             vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                                vbfloat16mf4_t vs1,
+                                                vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                              vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                                __bf16 vs1, vbfloat16mf4_t vs2,
+                                                size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                              vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_tum(vbool32_t vm, vfloat32m1_t vd,
+                                              vbfloat16mf2_t vs1,
+                                              vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                             vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_tum(vbool32_t vm, vfloat32m1_t vd,
+                                              __bf16 vs1, vbfloat16mf2_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                             vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_tum(vbool16_t vm, vfloat32m2_t vd,
+                                              vbfloat16m1_t vs1,
+                                              vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                             vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_tum(vbool16_t vm, vfloat32m2_t vd,
+                                              __bf16 vs1, vbfloat16m1_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                             vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_tum(vbool8_t vm, vfloat32m4_t vd,
+                                              vbfloat16m2_t vs1,
+                                              vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                             vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_tum(vbool8_t vm, vfloat32m4_t vd,
+                                              __bf16 vs1, vbfloat16m2_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                             vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_tum(vbool4_t vm, vfloat32m8_t vd,
+                                              vbfloat16m4_t vs1,
+                                              vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                             vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_tum(vbool4_t vm, vfloat32m8_t vd,
+                                              __bf16 vs1, vbfloat16m4_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                             vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                                 vbfloat16mf4_t vs1,
+                                                 vbfloat16mf4_t vs2,
+                                                 size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_rm_tumu(vm, vd, vs1, vs2,
+                                               __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                                 __bf16 vs1, vbfloat16mf4_t vs2,
+                                                 size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_rm_tumu(vm, vd, vs1, vs2,
+                                               __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                               vbfloat16mf2_t vs1,
+                                               vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                              vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                               __bf16 vs1, vbfloat16mf2_t vs2,
+                                               size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                              vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                               vbfloat16m1_t vs1,
+                                               vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                              vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                               __bf16 vs1, vbfloat16m1_t vs2,
+                                               size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                              vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                               vbfloat16m2_t vs1,
+                                               vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                              vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                               __bf16 vs1, vbfloat16m2_t vs2,
+                                               size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                              vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                               vbfloat16m4_t vs1,
+                                               vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                              vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                               __bf16 vs1, vbfloat16m4_t vs2,
+                                               size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                              vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                               vbfloat16mf4_t vs1,
+                                               vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                             vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                               __bf16 vs1, vbfloat16mf4_t vs2,
+                                               size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                             vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_mu(vbool32_t vm, vfloat32m1_t vd,
+                                             vbfloat16mf2_t vs1,
+                                             vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                            vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_mu(vbool32_t vm, vfloat32m1_t vd,
+                                             __bf16 vs1, vbfloat16mf2_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                            vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_mu(vbool16_t vm, vfloat32m2_t vd,
+                                             vbfloat16m1_t vs1,
+                                             vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                            vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_mu(vbool16_t vm, vfloat32m2_t vd,
+                                             __bf16 vs1, vbfloat16m1_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                            vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_mu(vbool8_t vm, vfloat32m4_t vd,
+                                             vbfloat16m2_t vs1,
+                                             vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                            vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_mu(vbool8_t vm, vfloat32m4_t vd,
+                                             __bf16 vs1, vbfloat16m2_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                            vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_mu(vbool4_t vm, vfloat32m8_t vd,
+                                             vbfloat16m4_t vs1,
+                                             vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                            vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_mu(vbool4_t vm, vfloat32m8_t vd,
+                                             __bf16 vs1, vbfloat16m4_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE,
+                                            vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vle16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vle16.c
new file mode 100644
index 000000000..d147cdfec
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vle16.c
@@ -0,0 +1,122 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                       size_t vl) {
+  return __riscv_vle16_v_bf16mf4_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                       size_t vl) {
+  return __riscv_vle16_v_bf16mf2_tu(vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                     size_t vl) {
+  return __riscv_vle16_v_bf16m1_tu(vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                     size_t vl) {
+  return __riscv_vle16_v_bf16m2_tu(vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                     size_t vl) {
+  return __riscv_vle16_v_bf16m4_tu(vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                     size_t vl) {
+  return __riscv_vle16_v_bf16m8_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                        const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf4_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                        const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf2_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                      const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m1_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                      const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m2_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                      const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m4_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                      const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m8_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                         const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf4_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                         const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf2_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m1_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m2_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m4_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m8_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf4_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf2_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                     const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m1_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                     const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m2_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                     const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m4_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                     const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m8_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vle16ff.c b/auto-generated/bfloat16/policy_funcs/api-testing/vle16ff.c
new file mode 100644
index 000000000..200105f94
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vle16ff.c
@@ -0,0 +1,140 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                         size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16mf4_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                         size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16mf2_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m1_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m2_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m4_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m8_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl) {
+  return __riscv_vle16ff_v_bf16mf4_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl) {
+  return __riscv_vle16ff_v_bf16mf2_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl) {
+  return __riscv_vle16ff_v_bf16m1_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl) {
+  return __riscv_vle16ff_v_bf16m2_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl) {
+  return __riscv_vle16ff_v_bf16m4_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl) {
+  return __riscv_vle16ff_v_bf16m8_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                           const __bf16 *rs1, size_t *new_vl,
+                                           size_t vl) {
+  return __riscv_vle16ff_v_bf16mf4_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                           const __bf16 *rs1, size_t *new_vl,
+                                           size_t vl) {
+  return __riscv_vle16ff_v_bf16mf2_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_v_bf16m1_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_v_bf16m2_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_v_bf16m4_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_v_bf16m8_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_v_bf16mf4_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_v_bf16mf2_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl) {
+  return __riscv_vle16ff_v_bf16m1_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl) {
+  return __riscv_vle16ff_v_bf16m2_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl) {
+  return __riscv_vle16ff_v_bf16m4_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl) {
+  return __riscv_vle16ff_v_bf16m8_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vloxei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vloxei16.c
new file mode 100644
index 000000000..75ab2d987
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vloxei16.c
@@ -0,0 +1,140 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                          vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16mf4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                          vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16mf2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m1_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                        vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                        vuint16m8_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl) {
+  return __riscv_vloxei16_v_bf16mf4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl) {
+  return __riscv_vloxei16_v_bf16mf2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl) {
+  return __riscv_vloxei16_v_bf16m1_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl) {
+  return __riscv_vloxei16_v_bf16m2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                         const __bf16 *rs1, vuint16m4_t rs2,
+                                         size_t vl) {
+  return __riscv_vloxei16_v_bf16m4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                         const __bf16 *rs1, vuint16m8_t rs2,
+                                         size_t vl) {
+  return __riscv_vloxei16_v_bf16m8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                            const __bf16 *rs1, vuint16mf4_t rs2,
+                                            size_t vl) {
+  return __riscv_vloxei16_v_bf16mf4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                            const __bf16 *rs1, vuint16mf2_t rs2,
+                                            size_t vl) {
+  return __riscv_vloxei16_v_bf16mf2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_v_bf16m1_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_v_bf16m2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                          const __bf16 *rs1, vuint16m4_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_v_bf16m4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                          const __bf16 *rs1, vuint16m8_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_v_bf16m8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_v_bf16mf4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_v_bf16mf2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl) {
+  return __riscv_vloxei16_v_bf16m1_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl) {
+  return __riscv_vloxei16_v_bf16m2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                        const __bf16 *rs1, vuint16m4_t rs2,
+                                        size_t vl) {
+  return __riscv_vloxei16_v_bf16m4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                        const __bf16 *rs1, vuint16m8_t rs2,
+                                        size_t vl) {
+  return __riscv_vloxei16_v_bf16m8_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg2ei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg2ei16.c
new file mode 100644
index 000000000..0ed314d13
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg2ei16.c
@@ -0,0 +1,139 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf4x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf2x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m1x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m2x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m4x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                   vbfloat16mf4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf4x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                   vbfloat16mf2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf2x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_tum(vbool16_t vm,
+                                                 vbfloat16m1x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m1x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_tum(vbool8_t vm,
+                                                 vbfloat16m2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m2x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_tum(vbool4_t vm,
+                                                 vbfloat16m4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m4x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf4x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf2x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                  vbfloat16m1x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m1x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_tumu(vbool8_t vm,
+                                                  vbfloat16m2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m2x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_tumu(vbool4_t vm,
+                                                  vbfloat16m4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m4x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                  vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf4x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                  vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf2x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_mu(vbool16_t vm,
+                                                vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m1x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m2x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m4x2_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg3ei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg3ei16.c
new file mode 100644
index 000000000..7939b8fb1
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg3ei16.c
@@ -0,0 +1,113 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf4x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf2x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m1x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m2x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                   vbfloat16mf4x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf4x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                   vbfloat16mf2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf2x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_tum(vbool16_t vm,
+                                                 vbfloat16m1x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m1x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_tum(vbool8_t vm,
+                                                 vbfloat16m2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m2x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf4x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf2x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                  vbfloat16m1x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m1x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_tumu(vbool8_t vm,
+                                                  vbfloat16m2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m2x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                  vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf4x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                  vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf2x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_mu(vbool16_t vm,
+                                                vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m1x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m2x3_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg4ei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg4ei16.c
new file mode 100644
index 000000000..d0b103679
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg4ei16.c
@@ -0,0 +1,113 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf4x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf2x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m1x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m2x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                   vbfloat16mf4x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf4x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                   vbfloat16mf2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf2x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_tum(vbool16_t vm,
+                                                 vbfloat16m1x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m1x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_tum(vbool8_t vm,
+                                                 vbfloat16m2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m2x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf4x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf2x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                  vbfloat16m1x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m1x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_tumu(vbool8_t vm,
+                                                  vbfloat16m2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m2x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                  vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf4x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                  vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf2x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_mu(vbool16_t vm,
+                                                vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m1x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m2x4_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg5ei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg5ei16.c
new file mode 100644
index 000000000..3c915d4d9
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg5ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf4x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf2x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16m1x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                   vbfloat16mf4x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf4x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                   vbfloat16mf2x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf2x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_tum(vbool16_t vm,
+                                                 vbfloat16m1x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16m1x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf4x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf2x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                  vbfloat16m1x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16m1x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                  vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf4x5_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                  vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf2x5_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_mu(vbool16_t vm,
+                                                vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16m1x5_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg6ei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg6ei16.c
new file mode 100644
index 000000000..55ab43069
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg6ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf4x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf2x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16m1x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                   vbfloat16mf4x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf4x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                   vbfloat16mf2x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf2x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_tum(vbool16_t vm,
+                                                 vbfloat16m1x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16m1x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf4x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf2x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                  vbfloat16m1x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16m1x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                  vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf4x6_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                  vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf2x6_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_mu(vbool16_t vm,
+                                                vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16m1x6_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg7ei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg7ei16.c
new file mode 100644
index 000000000..c430c1e47
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg7ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf4x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf2x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16m1x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                   vbfloat16mf4x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf4x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                   vbfloat16mf2x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf2x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_tum(vbool16_t vm,
+                                                 vbfloat16m1x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16m1x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf4x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf2x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                  vbfloat16m1x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16m1x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                  vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf4x7_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                  vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf2x7_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_mu(vbool16_t vm,
+                                                vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16m1x7_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg8ei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg8ei16.c
new file mode 100644
index 000000000..564807d33
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vloxseg8ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf4x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf2x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16m1x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                   vbfloat16mf4x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf4x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                   vbfloat16mf2x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf2x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_tum(vbool16_t vm,
+                                                 vbfloat16m1x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16m1x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf4x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf2x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                  vbfloat16m1x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16m1x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                  vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf4x8_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                  vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf2x8_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_mu(vbool16_t vm,
+                                                vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16m1x8_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlse16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlse16.c
new file mode 100644
index 000000000..ece6f8cae
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlse16.c
@@ -0,0 +1,140 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                        ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16mf4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                        ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16mf2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m1_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl) {
+  return __riscv_vlse16_v_bf16mf4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl) {
+  return __riscv_vlse16_v_bf16mf2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl) {
+  return __riscv_vlse16_v_bf16m1_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl) {
+  return __riscv_vlse16_v_bf16m2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl) {
+  return __riscv_vlse16_v_bf16m4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl) {
+  return __riscv_vlse16_v_bf16m8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                          const __bf16 *rs1, ptrdiff_t rs2,
+                                          size_t vl) {
+  return __riscv_vlse16_v_bf16mf4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                          const __bf16 *rs1, ptrdiff_t rs2,
+                                          size_t vl) {
+  return __riscv_vlse16_v_bf16mf2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_v_bf16m1_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_v_bf16m2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_v_bf16m4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_v_bf16m8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_v_bf16mf4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_v_bf16mf2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl) {
+  return __riscv_vlse16_v_bf16m1_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl) {
+  return __riscv_vlse16_v_bf16m2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl) {
+  return __riscv_vlse16_v_bf16m4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl) {
+  return __riscv_vlse16_v_bf16m8_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlseg2e16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg2e16.c
new file mode 100644
index 000000000..f2fbf24f5
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg2e16.c
@@ -0,0 +1,108 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf4x2_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf2x2_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m1x2_tu(vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m2x2_tu(vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m4x2_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                vbfloat16mf4x2_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf4x2_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                vbfloat16mf2x2_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf2x2_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m1x2_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m2x2_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m4x2_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x2_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf4x2_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x2_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf2x2_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_tumu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m1x2_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m2x2_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m4x2_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_mu(vbool64_t vm,
+                                               vbfloat16mf4x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf4x2_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_mu(vbool32_t vm,
+                                               vbfloat16mf2x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf2x2_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m1x2_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m2x2_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m4x2_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlseg2e16ff.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg2e16ff.c
new file mode 100644
index 000000000..da7df9f7f
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg2e16ff.c
@@ -0,0 +1,132 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf4x2_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf2x2_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m1x2_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m2x2_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m4x2_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_tum(vbool64_t vm,
+                                                  vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf4x2_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_tum(vbool32_t vm,
+                                                  vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf2x2_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_tum(vbool16_t vm,
+                                                vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m1x2_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m2x2_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m4x2_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf4x2_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf2x2_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_tumu(vbool16_t vm,
+                                                 vbfloat16m1x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m1x2_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_tumu(vbool8_t vm,
+                                                 vbfloat16m2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m2x2_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_tumu(vbool4_t vm,
+                                                 vbfloat16m4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m4x2_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_mu(vbool64_t vm,
+                                                 vbfloat16mf4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf4x2_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_mu(vbool32_t vm,
+                                                 vbfloat16mf2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf2x2_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m1x2_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m2x2_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m4x2_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlseg3e16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg3e16.c
new file mode 100644
index 000000000..550192ec0
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg3e16.c
@@ -0,0 +1,88 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf4x3_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf2x3_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m1x3_tu(vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m2x3_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                vbfloat16mf4x3_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf4x3_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                vbfloat16mf2x3_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf2x3_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m1x3_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m2x3_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x3_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf4x3_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x3_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf2x3_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_tumu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m1x3_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m2x3_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_mu(vbool64_t vm,
+                                               vbfloat16mf4x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf4x3_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_mu(vbool32_t vm,
+                                               vbfloat16mf2x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf2x3_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m1x3_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m2x3_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlseg3e16ff.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg3e16ff.c
new file mode 100644
index 000000000..6be408016
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg3e16ff.c
@@ -0,0 +1,107 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf4x3_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf2x3_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m1x3_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m2x3_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_tum(vbool64_t vm,
+                                                  vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf4x3_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_tum(vbool32_t vm,
+                                                  vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf2x3_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_tum(vbool16_t vm,
+                                                vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m1x3_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m2x3_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf4x3_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf2x3_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_tumu(vbool16_t vm,
+                                                 vbfloat16m1x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m1x3_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_tumu(vbool8_t vm,
+                                                 vbfloat16m2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m2x3_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_mu(vbool64_t vm,
+                                                 vbfloat16mf4x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf4x3_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_mu(vbool32_t vm,
+                                                 vbfloat16mf2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf2x3_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m1x3_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m2x3_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlseg4e16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg4e16.c
new file mode 100644
index 000000000..ba875d221
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg4e16.c
@@ -0,0 +1,88 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf4x4_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf2x4_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m1x4_tu(vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m2x4_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                vbfloat16mf4x4_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf4x4_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                vbfloat16mf2x4_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf2x4_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m1x4_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m2x4_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x4_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf4x4_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x4_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf2x4_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_tumu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m1x4_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m2x4_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_mu(vbool64_t vm,
+                                               vbfloat16mf4x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf4x4_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_mu(vbool32_t vm,
+                                               vbfloat16mf2x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf2x4_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m1x4_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m2x4_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlseg4e16ff.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg4e16ff.c
new file mode 100644
index 000000000..792e1be03
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg4e16ff.c
@@ -0,0 +1,107 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf4x4_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf2x4_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m1x4_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m2x4_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_tum(vbool64_t vm,
+                                                  vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf4x4_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_tum(vbool32_t vm,
+                                                  vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf2x4_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_tum(vbool16_t vm,
+                                                vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m1x4_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m2x4_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf4x4_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf2x4_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_tumu(vbool16_t vm,
+                                                 vbfloat16m1x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m1x4_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_tumu(vbool8_t vm,
+                                                 vbfloat16m2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m2x4_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_mu(vbool64_t vm,
+                                                 vbfloat16mf4x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf4x4_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_mu(vbool32_t vm,
+                                                 vbfloat16mf2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf2x4_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m1x4_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m2x4_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlseg5e16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg5e16.c
new file mode 100644
index 000000000..37a4cdad6
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg5e16.c
@@ -0,0 +1,68 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf4x5_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf2x5_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16m1x5_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                vbfloat16mf4x5_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf4x5_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                vbfloat16mf2x5_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf2x5_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16m1x5_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x5_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf4x5_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x5_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf2x5_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_tumu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16m1x5_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_mu(vbool64_t vm,
+                                               vbfloat16mf4x5_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf4x5_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_mu(vbool32_t vm,
+                                               vbfloat16mf2x5_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf2x5_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16m1x5_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlseg5e16ff.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg5e16ff.c
new file mode 100644
index 000000000..04d061397
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg5e16ff.c
@@ -0,0 +1,82 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf4x5_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf2x5_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16m1x5_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_tum(vbool64_t vm,
+                                                  vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf4x5_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_tum(vbool32_t vm,
+                                                  vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf2x5_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_tum(vbool16_t vm,
+                                                vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16m1x5_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf4x5_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf2x5_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_tumu(vbool16_t vm,
+                                                 vbfloat16m1x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16m1x5_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_mu(vbool64_t vm,
+                                                 vbfloat16mf4x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf4x5_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_mu(vbool32_t vm,
+                                                 vbfloat16mf2x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf2x5_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16m1x5_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlseg6e16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg6e16.c
new file mode 100644
index 000000000..143635f11
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg6e16.c
@@ -0,0 +1,68 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf4x6_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf2x6_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16m1x6_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                vbfloat16mf4x6_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf4x6_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                vbfloat16mf2x6_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf2x6_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16m1x6_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x6_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf4x6_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x6_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf2x6_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_tumu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16m1x6_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_mu(vbool64_t vm,
+                                               vbfloat16mf4x6_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf4x6_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_mu(vbool32_t vm,
+                                               vbfloat16mf2x6_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf2x6_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16m1x6_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlseg6e16ff.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg6e16ff.c
new file mode 100644
index 000000000..722c767fe
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg6e16ff.c
@@ -0,0 +1,82 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf4x6_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf2x6_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16m1x6_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_tum(vbool64_t vm,
+                                                  vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf4x6_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_tum(vbool32_t vm,
+                                                  vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf2x6_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_tum(vbool16_t vm,
+                                                vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16m1x6_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf4x6_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf2x6_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_tumu(vbool16_t vm,
+                                                 vbfloat16m1x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16m1x6_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_mu(vbool64_t vm,
+                                                 vbfloat16mf4x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf4x6_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_mu(vbool32_t vm,
+                                                 vbfloat16mf2x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf2x6_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16m1x6_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlseg7e16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg7e16.c
new file mode 100644
index 000000000..cfc5711dd
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg7e16.c
@@ -0,0 +1,68 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf4x7_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf2x7_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16m1x7_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                vbfloat16mf4x7_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf4x7_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                vbfloat16mf2x7_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf2x7_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16m1x7_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x7_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf4x7_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x7_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf2x7_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_tumu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16m1x7_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_mu(vbool64_t vm,
+                                               vbfloat16mf4x7_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf4x7_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_mu(vbool32_t vm,
+                                               vbfloat16mf2x7_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf2x7_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16m1x7_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlseg7e16ff.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg7e16ff.c
new file mode 100644
index 000000000..d53541c21
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg7e16ff.c
@@ -0,0 +1,82 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf4x7_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf2x7_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16m1x7_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_tum(vbool64_t vm,
+                                                  vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf4x7_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_tum(vbool32_t vm,
+                                                  vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf2x7_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_tum(vbool16_t vm,
+                                                vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16m1x7_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf4x7_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf2x7_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_tumu(vbool16_t vm,
+                                                 vbfloat16m1x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16m1x7_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_mu(vbool64_t vm,
+                                                 vbfloat16mf4x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf4x7_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_mu(vbool32_t vm,
+                                                 vbfloat16mf2x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf2x7_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16m1x7_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlseg8e16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg8e16.c
new file mode 100644
index 000000000..3294997eb
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg8e16.c
@@ -0,0 +1,68 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf4x8_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf2x8_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16m1x8_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                vbfloat16mf4x8_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf4x8_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                vbfloat16mf2x8_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf2x8_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16m1x8_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x8_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf4x8_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x8_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf2x8_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_tumu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16m1x8_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_mu(vbool64_t vm,
+                                               vbfloat16mf4x8_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf4x8_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_mu(vbool32_t vm,
+                                               vbfloat16mf2x8_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf2x8_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16m1x8_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlseg8e16ff.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg8e16ff.c
new file mode 100644
index 000000000..029dd6297
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlseg8e16ff.c
@@ -0,0 +1,82 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf4x8_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf2x8_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16m1x8_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_tum(vbool64_t vm,
+                                                  vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf4x8_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_tum(vbool32_t vm,
+                                                  vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf2x8_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_tum(vbool16_t vm,
+                                                vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16m1x8_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf4x8_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf2x8_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_tumu(vbool16_t vm,
+                                                 vbfloat16m1x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16m1x8_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_mu(vbool64_t vm,
+                                                 vbfloat16mf4x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf4x8_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_mu(vbool32_t vm,
+                                                 vbfloat16mf2x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf2x8_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16m1x8_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg2e16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg2e16.c
new file mode 100644
index 000000000..e15d577ae
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg2e16.c
@@ -0,0 +1,129 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf4x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf2x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m1x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m2x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m4x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                 vbfloat16mf4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf4x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                 vbfloat16mf2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf2x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m1x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m2x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m4x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf4x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf2x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m1x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m2x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m4x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                vbfloat16mf4x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf4x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                vbfloat16mf2x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf2x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m1x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m2x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m4x2_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg3e16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg3e16.c
new file mode 100644
index 000000000..65cbc96f4
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg3e16.c
@@ -0,0 +1,105 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf4x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf2x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m1x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m2x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                 vbfloat16mf4x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf4x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                 vbfloat16mf2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf2x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m1x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m2x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf4x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf2x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m1x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m2x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                vbfloat16mf4x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf4x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                vbfloat16mf2x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf2x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m1x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m2x3_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg4e16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg4e16.c
new file mode 100644
index 000000000..7721cf1d2
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg4e16.c
@@ -0,0 +1,105 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf4x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf2x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m1x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m2x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                 vbfloat16mf4x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf4x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                 vbfloat16mf2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf2x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m1x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m2x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf4x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf2x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m1x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m2x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                vbfloat16mf4x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf4x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                vbfloat16mf2x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf2x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m1x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m2x4_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg5e16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg5e16.c
new file mode 100644
index 000000000..d6df0b2bd
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg5e16.c
@@ -0,0 +1,81 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf4x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf2x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16m1x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                 vbfloat16mf4x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf4x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                 vbfloat16mf2x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf2x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16m1x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf4x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf2x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16m1x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                vbfloat16mf4x5_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf4x5_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                vbfloat16mf2x5_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf2x5_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16m1x5_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg6e16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg6e16.c
new file mode 100644
index 000000000..27c1cbd88
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg6e16.c
@@ -0,0 +1,81 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf4x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf2x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16m1x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                 vbfloat16mf4x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf4x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                 vbfloat16mf2x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf2x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16m1x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf4x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf2x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16m1x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                vbfloat16mf4x6_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf4x6_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                vbfloat16mf2x6_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf2x6_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16m1x6_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg7e16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg7e16.c
new file mode 100644
index 000000000..872b2f0d0
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg7e16.c
@@ -0,0 +1,81 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf4x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf2x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16m1x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                 vbfloat16mf4x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf4x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                 vbfloat16mf2x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf2x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16m1x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf4x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf2x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16m1x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                vbfloat16mf4x7_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf4x7_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                vbfloat16mf2x7_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf2x7_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16m1x7_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg8e16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg8e16.c
new file mode 100644
index 000000000..cee5491c5
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vlsseg8e16.c
@@ -0,0 +1,81 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf4x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf2x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16m1x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                 vbfloat16mf4x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf4x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                 vbfloat16mf2x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf2x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16m1x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf4x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf2x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16m1x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                vbfloat16mf4x8_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf4x8_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                vbfloat16mf2x8_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf2x8_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16m1x8_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vluxei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vluxei16.c
new file mode 100644
index 000000000..2b61e3f6d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vluxei16.c
@@ -0,0 +1,140 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                          vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16mf4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                          vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16mf2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m1_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                        vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                        vuint16m8_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl) {
+  return __riscv_vluxei16_v_bf16mf4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl) {
+  return __riscv_vluxei16_v_bf16mf2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl) {
+  return __riscv_vluxei16_v_bf16m1_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl) {
+  return __riscv_vluxei16_v_bf16m2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                         const __bf16 *rs1, vuint16m4_t rs2,
+                                         size_t vl) {
+  return __riscv_vluxei16_v_bf16m4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                         const __bf16 *rs1, vuint16m8_t rs2,
+                                         size_t vl) {
+  return __riscv_vluxei16_v_bf16m8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                            const __bf16 *rs1, vuint16mf4_t rs2,
+                                            size_t vl) {
+  return __riscv_vluxei16_v_bf16mf4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                            const __bf16 *rs1, vuint16mf2_t rs2,
+                                            size_t vl) {
+  return __riscv_vluxei16_v_bf16mf2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_v_bf16m1_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_v_bf16m2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                          const __bf16 *rs1, vuint16m4_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_v_bf16m4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                          const __bf16 *rs1, vuint16m8_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_v_bf16m8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_v_bf16mf4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_v_bf16mf2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl) {
+  return __riscv_vluxei16_v_bf16m1_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl) {
+  return __riscv_vluxei16_v_bf16m2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                        const __bf16 *rs1, vuint16m4_t rs2,
+                                        size_t vl) {
+  return __riscv_vluxei16_v_bf16m4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                        const __bf16 *rs1, vuint16m8_t rs2,
+                                        size_t vl) {
+  return __riscv_vluxei16_v_bf16m8_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg2ei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg2ei16.c
new file mode 100644
index 000000000..4c4852bf6
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg2ei16.c
@@ -0,0 +1,139 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf4x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf2x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m1x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m2x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m4x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                   vbfloat16mf4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf4x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                   vbfloat16mf2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf2x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_tum(vbool16_t vm,
+                                                 vbfloat16m1x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m1x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_tum(vbool8_t vm,
+                                                 vbfloat16m2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m2x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_tum(vbool4_t vm,
+                                                 vbfloat16m4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m4x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf4x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf2x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                  vbfloat16m1x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m1x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_tumu(vbool8_t vm,
+                                                  vbfloat16m2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m2x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_tumu(vbool4_t vm,
+                                                  vbfloat16m4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m4x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                  vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf4x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                  vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf2x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_mu(vbool16_t vm,
+                                                vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m1x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m2x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m4x2_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg3ei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg3ei16.c
new file mode 100644
index 000000000..2ddb3a2ff
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg3ei16.c
@@ -0,0 +1,113 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf4x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf2x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m1x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m2x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                   vbfloat16mf4x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf4x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                   vbfloat16mf2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf2x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_tum(vbool16_t vm,
+                                                 vbfloat16m1x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m1x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_tum(vbool8_t vm,
+                                                 vbfloat16m2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m2x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf4x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf2x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                  vbfloat16m1x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m1x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_tumu(vbool8_t vm,
+                                                  vbfloat16m2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m2x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                  vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf4x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                  vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf2x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_mu(vbool16_t vm,
+                                                vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m1x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m2x3_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg4ei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg4ei16.c
new file mode 100644
index 000000000..c26f49f3d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg4ei16.c
@@ -0,0 +1,113 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf4x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf2x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m1x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m2x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                   vbfloat16mf4x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf4x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                   vbfloat16mf2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf2x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_tum(vbool16_t vm,
+                                                 vbfloat16m1x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m1x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_tum(vbool8_t vm,
+                                                 vbfloat16m2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m2x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf4x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf2x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                  vbfloat16m1x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m1x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_tumu(vbool8_t vm,
+                                                  vbfloat16m2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m2x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                  vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf4x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                  vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf2x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_mu(vbool16_t vm,
+                                                vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m1x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m2x4_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg5ei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg5ei16.c
new file mode 100644
index 000000000..10e15cfcf
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg5ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf4x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf2x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16m1x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                   vbfloat16mf4x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf4x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                   vbfloat16mf2x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf2x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_tum(vbool16_t vm,
+                                                 vbfloat16m1x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16m1x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf4x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf2x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                  vbfloat16m1x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16m1x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                  vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf4x5_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                  vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf2x5_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_mu(vbool16_t vm,
+                                                vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16m1x5_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg6ei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg6ei16.c
new file mode 100644
index 000000000..618ec0ca1
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg6ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf4x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf2x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16m1x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                   vbfloat16mf4x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf4x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                   vbfloat16mf2x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf2x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_tum(vbool16_t vm,
+                                                 vbfloat16m1x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16m1x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf4x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf2x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                  vbfloat16m1x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16m1x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                  vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf4x6_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                  vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf2x6_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_mu(vbool16_t vm,
+                                                vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16m1x6_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg7ei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg7ei16.c
new file mode 100644
index 000000000..aca74804f
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg7ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf4x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf2x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16m1x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                   vbfloat16mf4x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf4x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                   vbfloat16mf2x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf2x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_tum(vbool16_t vm,
+                                                 vbfloat16m1x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16m1x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf4x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf2x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                  vbfloat16m1x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16m1x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                  vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf4x7_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                  vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf2x7_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_mu(vbool16_t vm,
+                                                vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16m1x7_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg8ei16.c b/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg8ei16.c
new file mode 100644
index 000000000..9c7f8a09e
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/api-testing/vluxseg8ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf4x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf2x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16m1x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                   vbfloat16mf4x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf4x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                   vbfloat16mf2x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf2x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_tum(vbool16_t vm,
+                                                 vbfloat16m1x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16m1x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf4x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf2x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                  vbfloat16m1x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16m1x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                  vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf4x8_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                  vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf2x8_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_mu(vbool16_t vm,
+                                                vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16m1x8_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/intrinsic_funcs.adoc b/auto-generated/bfloat16/policy_funcs/intrinsic_funcs.adoc
new file mode 100644
index 000000000..37161ceff
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/intrinsic_funcs.adoc
@@ -0,0 +1,2886 @@
+
+=== BFloat16 Vector Loads and Stores Intrinsics
+
+[[policy-variant-bf16-vector-unit-stride-load]]
+==== Vector Unit-Stride Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vle16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                          size_t vl);
+vbfloat16mf2_t __riscv_vle16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                          size_t vl);
+vbfloat16m1_t __riscv_vle16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                        size_t vl);
+vbfloat16m2_t __riscv_vle16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                        size_t vl);
+vbfloat16m4_t __riscv_vle16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                        size_t vl);
+vbfloat16m8_t __riscv_vle16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                        size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                           const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                           const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                         const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                         const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                         const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                         const __bf16 *rs1, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                            const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                            const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                          const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                          const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                          const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                          const __bf16 *rs1, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                          const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                          const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                        const __bf16 *rs1, size_t vl);
+----
+
+[[policy-variant-bf16-vector-unit-stride-store]]
+==== Vector Unit-Stride Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-strided-load]]
+==== Vector Strided Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vlse16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                           ptrdiff_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vlse16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                           ptrdiff_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vlse16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                         ptrdiff_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vlse16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                         ptrdiff_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vlse16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                         ptrdiff_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vlse16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                         ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vlse16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                            const __bf16 *rs1, ptrdiff_t rs2,
+                                            size_t vl);
+vbfloat16mf2_t __riscv_vlse16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                            const __bf16 *rs1, ptrdiff_t rs2,
+                                            size_t vl);
+vbfloat16m1_t __riscv_vlse16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                          const __bf16 *rs1, ptrdiff_t rs2,
+                                          size_t vl);
+vbfloat16m2_t __riscv_vlse16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                          const __bf16 *rs1, ptrdiff_t rs2,
+                                          size_t vl);
+vbfloat16m4_t __riscv_vlse16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                          const __bf16 *rs1, ptrdiff_t rs2,
+                                          size_t vl);
+vbfloat16m8_t __riscv_vlse16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                          const __bf16 *rs1, ptrdiff_t rs2,
+                                          size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vlse16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                             const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl);
+vbfloat16mf2_t __riscv_vlse16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                             const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl);
+vbfloat16m1_t __riscv_vlse16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                           const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl);
+vbfloat16m2_t __riscv_vlse16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                           const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl);
+vbfloat16m4_t __riscv_vlse16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                           const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl);
+vbfloat16m8_t __riscv_vlse16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                           const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vlse16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                           const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl);
+vbfloat16mf2_t __riscv_vlse16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                           const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl);
+vbfloat16m1_t __riscv_vlse16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16m2_t __riscv_vlse16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16m4_t __riscv_vlse16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16m8_t __riscv_vlse16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+----
+
+[[policy-variant-vector-strided-store]]
+==== Vector Strided Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-indexed-load]]
+==== Vector Indexed Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vloxei16_v_bf16mf4_tu(vbfloat16mf4_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_v_bf16mf2_tu(vbfloat16mf2_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                           vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vloxei16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                           vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vloxei16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                           vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vloxei16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                           vuint16m8_t rs2, size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_v_bf16mf4_tu(vbfloat16mf4_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_v_bf16mf2_tu(vbfloat16mf2_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                           vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vluxei16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                           vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vluxei16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                           vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vluxei16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                           vuint16m8_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                              const __bf16 *rs1,
+                                              vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                              const __bf16 *rs1,
+                                              vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                            const __bf16 *rs1, vuint16m1_t rs2,
+                                            size_t vl);
+vbfloat16m2_t __riscv_vloxei16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                            const __bf16 *rs1, vuint16m2_t rs2,
+                                            size_t vl);
+vbfloat16m4_t __riscv_vloxei16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                            const __bf16 *rs1, vuint16m4_t rs2,
+                                            size_t vl);
+vbfloat16m8_t __riscv_vloxei16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                            const __bf16 *rs1, vuint16m8_t rs2,
+                                            size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                              const __bf16 *rs1,
+                                              vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                              const __bf16 *rs1,
+                                              vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                            const __bf16 *rs1, vuint16m1_t rs2,
+                                            size_t vl);
+vbfloat16m2_t __riscv_vluxei16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                            const __bf16 *rs1, vuint16m2_t rs2,
+                                            size_t vl);
+vbfloat16m4_t __riscv_vluxei16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                            const __bf16 *rs1, vuint16m4_t rs2,
+                                            size_t vl);
+vbfloat16m8_t __riscv_vluxei16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                            const __bf16 *rs1, vuint16m8_t rs2,
+                                            size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                               const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                               const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                             const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl);
+vbfloat16m2_t __riscv_vloxei16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                             const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl);
+vbfloat16m4_t __riscv_vloxei16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                             const __bf16 *rs1, vuint16m4_t rs2,
+                                             size_t vl);
+vbfloat16m8_t __riscv_vloxei16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                             const __bf16 *rs1, vuint16m8_t rs2,
+                                             size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                               const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                               const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                             const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl);
+vbfloat16m2_t __riscv_vluxei16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                             const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl);
+vbfloat16m4_t __riscv_vluxei16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                             const __bf16 *rs1, vuint16m4_t rs2,
+                                             size_t vl);
+vbfloat16m8_t __riscv_vluxei16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                             const __bf16 *rs1, vuint16m8_t rs2,
+                                             size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                           const __bf16 *rs1, vuint16m1_t rs2,
+                                           size_t vl);
+vbfloat16m2_t __riscv_vloxei16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                           const __bf16 *rs1, vuint16m2_t rs2,
+                                           size_t vl);
+vbfloat16m4_t __riscv_vloxei16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                           const __bf16 *rs1, vuint16m4_t rs2,
+                                           size_t vl);
+vbfloat16m8_t __riscv_vloxei16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                           const __bf16 *rs1, vuint16m8_t rs2,
+                                           size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                           const __bf16 *rs1, vuint16m1_t rs2,
+                                           size_t vl);
+vbfloat16m2_t __riscv_vluxei16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                           const __bf16 *rs1, vuint16m2_t rs2,
+                                           size_t vl);
+vbfloat16m4_t __riscv_vluxei16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                           const __bf16 *rs1, vuint16m4_t rs2,
+                                           size_t vl);
+vbfloat16m8_t __riscv_vluxei16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                           const __bf16 *rs1, vuint16m8_t rs2,
+                                           size_t vl);
+----
+
+[[policy-variant-vector-indexed-store]]
+==== Vector Indexed Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-unit-stride-fault-only-first-loads]]
+==== Unit-stride Fault-Only-First Loads Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vle16ff_v_bf16mf4_tu(vbfloat16mf4_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_v_bf16mf2_tu(vbfloat16mf2_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+vbfloat16m1_t __riscv_vle16ff_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                          size_t *new_vl, size_t vl);
+vbfloat16m2_t __riscv_vle16ff_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                          size_t *new_vl, size_t vl);
+vbfloat16m4_t __riscv_vle16ff_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                          size_t *new_vl, size_t vl);
+vbfloat16m8_t __riscv_vle16ff_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                          size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                             const __bf16 *rs1, size_t *new_vl,
+                                             size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                             const __bf16 *rs1, size_t *new_vl,
+                                             size_t vl);
+vbfloat16m1_t __riscv_vle16ff_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                           const __bf16 *rs1, size_t *new_vl,
+                                           size_t vl);
+vbfloat16m2_t __riscv_vle16ff_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                           const __bf16 *rs1, size_t *new_vl,
+                                           size_t vl);
+vbfloat16m4_t __riscv_vle16ff_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                           const __bf16 *rs1, size_t *new_vl,
+                                           size_t vl);
+vbfloat16m8_t __riscv_vle16ff_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                           const __bf16 *rs1, size_t *new_vl,
+                                           size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                              const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                              const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl);
+vbfloat16m1_t __riscv_vle16ff_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+vbfloat16m2_t __riscv_vle16ff_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+vbfloat16m4_t __riscv_vle16ff_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+vbfloat16m8_t __riscv_vle16ff_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+vbfloat16m1_t __riscv_vle16ff_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16m2_t __riscv_vle16ff_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16m4_t __riscv_vle16ff_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16m8_t __riscv_vle16ff_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+----
+
+=== BFloat16 Vector Loads and Stores Segment Intrinsics
+
+[[policy-variant-vector-unit-stride-segment-load]]
+==== Vector Unit-Stride Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vlseg2e16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                   vbfloat16mf4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                   vbfloat16mf4x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                   vbfloat16mf4x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                   vbfloat16mf4x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                   vbfloat16mf4x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                   vbfloat16mf4x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                   vbfloat16mf4x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                   vbfloat16mf2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                   vbfloat16mf2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                   vbfloat16mf2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                   vbfloat16mf2x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                   vbfloat16mf2x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                   vbfloat16mf2x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                   vbfloat16mf2x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_v_bf16m1x2_tum(vbool16_t vm,
+                                                 vbfloat16m1x2_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_v_bf16m1x3_tum(vbool16_t vm,
+                                                 vbfloat16m1x3_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_v_bf16m1x4_tum(vbool16_t vm,
+                                                 vbfloat16m1x4_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_v_bf16m1x5_tum(vbool16_t vm,
+                                                 vbfloat16m1x5_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_v_bf16m1x6_tum(vbool16_t vm,
+                                                 vbfloat16m1x6_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_v_bf16m1x7_tum(vbool16_t vm,
+                                                 vbfloat16m1x7_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_v_bf16m1x8_tum(vbool16_t vm,
+                                                 vbfloat16m1x8_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_v_bf16m2x2_tum(vbool8_t vm,
+                                                 vbfloat16m2x2_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_v_bf16m2x3_tum(vbool8_t vm,
+                                                 vbfloat16m2x3_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_v_bf16m2x4_tum(vbool8_t vm,
+                                                 vbfloat16m2x4_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_v_bf16m4x2_tum(vbool4_t vm,
+                                                 vbfloat16m4x2_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_v_bf16mf4x2_tum(vbool64_t vm,
+                                                     vbfloat16mf4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_v_bf16mf4x3_tum(vbool64_t vm,
+                                                     vbfloat16mf4x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_v_bf16mf4x4_tum(vbool64_t vm,
+                                                     vbfloat16mf4x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_v_bf16mf4x5_tum(vbool64_t vm,
+                                                     vbfloat16mf4x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_v_bf16mf4x6_tum(vbool64_t vm,
+                                                     vbfloat16mf4x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_v_bf16mf4x7_tum(vbool64_t vm,
+                                                     vbfloat16mf4x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_v_bf16mf4x8_tum(vbool64_t vm,
+                                                     vbfloat16mf4x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_v_bf16mf2x2_tum(vbool32_t vm,
+                                                     vbfloat16mf2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_v_bf16mf2x3_tum(vbool32_t vm,
+                                                     vbfloat16mf2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_v_bf16mf2x4_tum(vbool32_t vm,
+                                                     vbfloat16mf2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_v_bf16mf2x5_tum(vbool32_t vm,
+                                                     vbfloat16mf2x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_v_bf16mf2x6_tum(vbool32_t vm,
+                                                     vbfloat16mf2x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_v_bf16mf2x7_tum(vbool32_t vm,
+                                                     vbfloat16mf2x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_v_bf16mf2x8_tum(vbool32_t vm,
+                                                     vbfloat16mf2x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_v_bf16m1x2_tum(vbool16_t vm,
+                                                   vbfloat16m1x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_v_bf16m1x3_tum(vbool16_t vm,
+                                                   vbfloat16m1x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_v_bf16m1x4_tum(vbool16_t vm,
+                                                   vbfloat16m1x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_v_bf16m1x5_tum(vbool16_t vm,
+                                                   vbfloat16m1x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_v_bf16m1x6_tum(vbool16_t vm,
+                                                   vbfloat16m1x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_v_bf16m1x7_tum(vbool16_t vm,
+                                                   vbfloat16m1x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_v_bf16m1x8_tum(vbool16_t vm,
+                                                   vbfloat16m1x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_v_bf16m2x2_tum(vbool8_t vm,
+                                                   vbfloat16m2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_v_bf16m2x3_tum(vbool8_t vm,
+                                                   vbfloat16m2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_v_bf16m2x4_tum(vbool8_t vm,
+                                                   vbfloat16m2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_v_bf16m4x2_tum(vbool4_t vm,
+                                                   vbfloat16m4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                  vbfloat16m1x2_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                  vbfloat16m1x3_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                  vbfloat16m1x4_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                  vbfloat16m1x5_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                  vbfloat16m1x6_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                  vbfloat16m1x7_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                  vbfloat16m1x8_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_v_bf16m2x2_tumu(vbool8_t vm,
+                                                  vbfloat16m2x2_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_v_bf16m2x3_tumu(vbool8_t vm,
+                                                  vbfloat16m2x3_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_v_bf16m2x4_tumu(vbool8_t vm,
+                                                  vbfloat16m2x4_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_v_bf16m4x2_tumu(vbool4_t vm,
+                                                  vbfloat16m4x2_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                      vbfloat16mf4x2_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                      vbfloat16mf4x3_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                      vbfloat16mf4x4_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                      vbfloat16mf4x5_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                      vbfloat16mf4x6_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                      vbfloat16mf4x7_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                      vbfloat16mf4x8_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                      vbfloat16mf2x2_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                      vbfloat16mf2x3_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                      vbfloat16mf2x4_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                      vbfloat16mf2x5_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                      vbfloat16mf2x6_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                      vbfloat16mf2x7_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                      vbfloat16mf2x8_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_v_bf16m1x2_tumu(vbool16_t vm,
+                                                    vbfloat16m1x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_v_bf16m1x3_tumu(vbool16_t vm,
+                                                    vbfloat16m1x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_v_bf16m1x4_tumu(vbool16_t vm,
+                                                    vbfloat16m1x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_v_bf16m1x5_tumu(vbool16_t vm,
+                                                    vbfloat16m1x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_v_bf16m1x6_tumu(vbool16_t vm,
+                                                    vbfloat16m1x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_v_bf16m1x7_tumu(vbool16_t vm,
+                                                    vbfloat16m1x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_v_bf16m1x8_tumu(vbool16_t vm,
+                                                    vbfloat16m1x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_v_bf16m2x2_tumu(vbool8_t vm,
+                                                    vbfloat16m2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_v_bf16m2x3_tumu(vbool8_t vm,
+                                                    vbfloat16m2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_v_bf16m2x4_tumu(vbool8_t vm,
+                                                    vbfloat16m2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_v_bf16m4x2_tumu(vbool4_t vm,
+                                                    vbfloat16m4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                  vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                  vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                  vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                  vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                  vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                  vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                  vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                  vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                  vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                  vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                  vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                  vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                  vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                  vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_v_bf16m1x2_mu(vbool16_t vm,
+                                                vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_v_bf16m1x3_mu(vbool16_t vm,
+                                                vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_v_bf16m1x4_mu(vbool16_t vm,
+                                                vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_v_bf16m1x5_mu(vbool16_t vm,
+                                                vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_v_bf16m1x6_mu(vbool16_t vm,
+                                                vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_v_bf16m1x7_mu(vbool16_t vm,
+                                                vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_v_bf16m1x8_mu(vbool16_t vm,
+                                                vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_v_bf16mf4x2_mu(vbool64_t vm,
+                                                    vbfloat16mf4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_v_bf16mf4x3_mu(vbool64_t vm,
+                                                    vbfloat16mf4x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_v_bf16mf4x4_mu(vbool64_t vm,
+                                                    vbfloat16mf4x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_v_bf16mf4x5_mu(vbool64_t vm,
+                                                    vbfloat16mf4x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_v_bf16mf4x6_mu(vbool64_t vm,
+                                                    vbfloat16mf4x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_v_bf16mf4x7_mu(vbool64_t vm,
+                                                    vbfloat16mf4x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_v_bf16mf4x8_mu(vbool64_t vm,
+                                                    vbfloat16mf4x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_v_bf16mf2x2_mu(vbool32_t vm,
+                                                    vbfloat16mf2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_v_bf16mf2x3_mu(vbool32_t vm,
+                                                    vbfloat16mf2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_v_bf16mf2x4_mu(vbool32_t vm,
+                                                    vbfloat16mf2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_v_bf16mf2x5_mu(vbool32_t vm,
+                                                    vbfloat16mf2x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_v_bf16mf2x6_mu(vbool32_t vm,
+                                                    vbfloat16mf2x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_v_bf16mf2x7_mu(vbool32_t vm,
+                                                    vbfloat16mf2x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_v_bf16mf2x8_mu(vbool32_t vm,
+                                                    vbfloat16mf2x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_v_bf16m1x2_mu(vbool16_t vm,
+                                                  vbfloat16m1x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_v_bf16m1x3_mu(vbool16_t vm,
+                                                  vbfloat16m1x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_v_bf16m1x4_mu(vbool16_t vm,
+                                                  vbfloat16m1x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_v_bf16m1x5_mu(vbool16_t vm,
+                                                  vbfloat16m1x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_v_bf16m1x6_mu(vbool16_t vm,
+                                                  vbfloat16m1x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_v_bf16m1x7_mu(vbool16_t vm,
+                                                  vbfloat16m1x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_v_bf16m1x8_mu(vbool16_t vm,
+                                                  vbfloat16m1x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_v_bf16m2x2_mu(vbool8_t vm,
+                                                  vbfloat16m2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_v_bf16m2x3_mu(vbool8_t vm,
+                                                  vbfloat16m2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_v_bf16m2x4_mu(vbool8_t vm,
+                                                  vbfloat16m2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_v_bf16m4x2_mu(vbool4_t vm,
+                                                  vbfloat16m4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+----
+
+[[policy-variant-vecrtor-unit-stride-segment-store]]
+==== Vector Unit-Stride Segment Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-strided-segment-load]]
+==== Vector Strided Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vlsseg2e16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                    vbfloat16mf4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                    vbfloat16mf4x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                    vbfloat16mf4x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                    vbfloat16mf4x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                    vbfloat16mf4x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                    vbfloat16mf4x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                    vbfloat16mf4x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                    vbfloat16mf2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                    vbfloat16mf2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                    vbfloat16mf2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                    vbfloat16mf2x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                    vbfloat16mf2x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                    vbfloat16mf2x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                    vbfloat16mf2x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_v_bf16m1x2_tum(vbool16_t vm,
+                                                  vbfloat16m1x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_v_bf16m1x3_tum(vbool16_t vm,
+                                                  vbfloat16m1x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_v_bf16m1x4_tum(vbool16_t vm,
+                                                  vbfloat16m1x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_v_bf16m1x5_tum(vbool16_t vm,
+                                                  vbfloat16m1x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_v_bf16m1x6_tum(vbool16_t vm,
+                                                  vbfloat16m1x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_v_bf16m1x7_tum(vbool16_t vm,
+                                                  vbfloat16m1x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_v_bf16m1x8_tum(vbool16_t vm,
+                                                  vbfloat16m1x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_v_bf16m2x2_tum(vbool8_t vm,
+                                                  vbfloat16m2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_v_bf16m2x3_tum(vbool8_t vm,
+                                                  vbfloat16m2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_v_bf16m2x4_tum(vbool8_t vm,
+                                                  vbfloat16m2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_v_bf16m4x2_tum(vbool4_t vm,
+                                                  vbfloat16m4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                     vbfloat16mf4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                     vbfloat16mf4x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                     vbfloat16mf4x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                     vbfloat16mf4x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                     vbfloat16mf4x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                     vbfloat16mf4x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                     vbfloat16mf4x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                     vbfloat16mf2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                     vbfloat16mf2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                     vbfloat16mf2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                     vbfloat16mf2x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                     vbfloat16mf2x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                     vbfloat16mf2x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                     vbfloat16mf2x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                   vbfloat16m1x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                   vbfloat16m1x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                   vbfloat16m1x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                   vbfloat16m1x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                   vbfloat16m1x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                   vbfloat16m1x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                   vbfloat16m1x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_v_bf16m2x2_tumu(vbool8_t vm,
+                                                   vbfloat16m2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_v_bf16m2x3_tumu(vbool8_t vm,
+                                                   vbfloat16m2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_v_bf16m2x4_tumu(vbool8_t vm,
+                                                   vbfloat16m2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_v_bf16m4x2_tumu(vbool4_t vm,
+                                                   vbfloat16m4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                   vbfloat16mf4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                   vbfloat16mf4x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                   vbfloat16mf4x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                   vbfloat16mf4x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                   vbfloat16mf4x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                   vbfloat16mf4x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                   vbfloat16mf4x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                   vbfloat16mf2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                   vbfloat16mf2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                   vbfloat16mf2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                   vbfloat16mf2x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                   vbfloat16mf2x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                   vbfloat16mf2x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                   vbfloat16mf2x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_v_bf16m1x2_mu(vbool16_t vm,
+                                                 vbfloat16m1x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_v_bf16m1x3_mu(vbool16_t vm,
+                                                 vbfloat16m1x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_v_bf16m1x4_mu(vbool16_t vm,
+                                                 vbfloat16m1x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_v_bf16m1x5_mu(vbool16_t vm,
+                                                 vbfloat16m1x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_v_bf16m1x6_mu(vbool16_t vm,
+                                                 vbfloat16m1x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_v_bf16m1x7_mu(vbool16_t vm,
+                                                 vbfloat16m1x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_v_bf16m1x8_mu(vbool16_t vm,
+                                                 vbfloat16m1x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_v_bf16m2x2_mu(vbool8_t vm,
+                                                 vbfloat16m2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_v_bf16m2x3_mu(vbool8_t vm,
+                                                 vbfloat16m2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_v_bf16m2x4_mu(vbool8_t vm,
+                                                 vbfloat16m2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_v_bf16m4x2_mu(vbool4_t vm,
+                                                 vbfloat16m4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+----
+
+[[policy-variant-vector-strided-segment-store]]
+==== Vector Strided Segment Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-indexed-segment-load]]
+==== Vector Indexed Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m4_t rs2, size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m4_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                      vbfloat16mf4x2_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                      vbfloat16mf4x3_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                      vbfloat16mf4x4_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                      vbfloat16mf4x5_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                      vbfloat16mf4x6_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                      vbfloat16mf4x7_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                      vbfloat16mf4x8_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                      vbfloat16mf2x2_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                      vbfloat16mf2x3_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                      vbfloat16mf2x4_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                      vbfloat16mf2x5_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                      vbfloat16mf2x6_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                      vbfloat16mf2x7_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                      vbfloat16mf2x8_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_v_bf16m1x2_tum(vbool16_t vm,
+                                                    vbfloat16m1x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_v_bf16m1x3_tum(vbool16_t vm,
+                                                    vbfloat16m1x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_v_bf16m1x4_tum(vbool16_t vm,
+                                                    vbfloat16m1x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_v_bf16m1x5_tum(vbool16_t vm,
+                                                    vbfloat16m1x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_v_bf16m1x6_tum(vbool16_t vm,
+                                                    vbfloat16m1x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_v_bf16m1x7_tum(vbool16_t vm,
+                                                    vbfloat16m1x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_v_bf16m1x8_tum(vbool16_t vm,
+                                                    vbfloat16m1x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_v_bf16m2x2_tum(vbool8_t vm,
+                                                    vbfloat16m2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_v_bf16m2x3_tum(vbool8_t vm,
+                                                    vbfloat16m2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_v_bf16m2x4_tum(vbool8_t vm,
+                                                    vbfloat16m2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_v_bf16m4x2_tum(vbool4_t vm,
+                                                    vbfloat16m4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m4_t rs2, size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                      vbfloat16mf4x2_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                      vbfloat16mf4x3_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                      vbfloat16mf4x4_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                      vbfloat16mf4x5_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                      vbfloat16mf4x6_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                      vbfloat16mf4x7_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                      vbfloat16mf4x8_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                      vbfloat16mf2x2_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                      vbfloat16mf2x3_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                      vbfloat16mf2x4_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                      vbfloat16mf2x5_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                      vbfloat16mf2x6_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                      vbfloat16mf2x7_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                      vbfloat16mf2x8_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_v_bf16m1x2_tum(vbool16_t vm,
+                                                    vbfloat16m1x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_v_bf16m1x3_tum(vbool16_t vm,
+                                                    vbfloat16m1x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_v_bf16m1x4_tum(vbool16_t vm,
+                                                    vbfloat16m1x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_v_bf16m1x5_tum(vbool16_t vm,
+                                                    vbfloat16m1x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_v_bf16m1x6_tum(vbool16_t vm,
+                                                    vbfloat16m1x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_v_bf16m1x7_tum(vbool16_t vm,
+                                                    vbfloat16m1x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_v_bf16m1x8_tum(vbool16_t vm,
+                                                    vbfloat16m1x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_v_bf16m2x2_tum(vbool8_t vm,
+                                                    vbfloat16m2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_v_bf16m2x3_tum(vbool8_t vm,
+                                                    vbfloat16m2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_v_bf16m2x4_tum(vbool8_t vm,
+                                                    vbfloat16m2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_v_bf16m4x2_tum(vbool4_t vm,
+                                                    vbfloat16m4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m4_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x2_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x3_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x4_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x5_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x6_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x7_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x8_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x2_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x3_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x4_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x5_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x6_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x7_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x8_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                     vbfloat16m1x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                     vbfloat16m1x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                     vbfloat16m1x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                     vbfloat16m1x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                     vbfloat16m1x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                     vbfloat16m1x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                     vbfloat16m1x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_v_bf16m2x2_tumu(vbool8_t vm,
+                                                     vbfloat16m2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m2_t rs2,
+                                                     size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_v_bf16m2x3_tumu(vbool8_t vm,
+                                                     vbfloat16m2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m2_t rs2,
+                                                     size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_v_bf16m2x4_tumu(vbool8_t vm,
+                                                     vbfloat16m2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m2_t rs2,
+                                                     size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_v_bf16m4x2_tumu(vbool4_t vm,
+                                                     vbfloat16m4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x2_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x3_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x4_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x5_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x6_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x7_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x8_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x2_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x3_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x4_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x5_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x6_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x7_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x8_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                     vbfloat16m1x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                     vbfloat16m1x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                     vbfloat16m1x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                     vbfloat16m1x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                     vbfloat16m1x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                     vbfloat16m1x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                     vbfloat16m1x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_v_bf16m2x2_tumu(vbool8_t vm,
+                                                     vbfloat16m2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m2_t rs2,
+                                                     size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_v_bf16m2x3_tumu(vbool8_t vm,
+                                                     vbfloat16m2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m2_t rs2,
+                                                     size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_v_bf16m2x4_tumu(vbool8_t vm,
+                                                     vbfloat16m2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m2_t rs2,
+                                                     size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_v_bf16m4x2_tumu(vbool4_t vm,
+                                                     vbfloat16m4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m4_t rs2,
+                                                     size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                     vbfloat16mf4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                     vbfloat16mf4x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                     vbfloat16mf4x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                     vbfloat16mf4x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                     vbfloat16mf4x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                     vbfloat16mf4x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                     vbfloat16mf4x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                     vbfloat16mf2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                     vbfloat16mf2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                     vbfloat16mf2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                     vbfloat16mf2x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                     vbfloat16mf2x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                     vbfloat16mf2x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                     vbfloat16mf2x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_v_bf16m1x2_mu(vbool16_t vm,
+                                                   vbfloat16m1x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_v_bf16m1x3_mu(vbool16_t vm,
+                                                   vbfloat16m1x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_v_bf16m1x4_mu(vbool16_t vm,
+                                                   vbfloat16m1x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_v_bf16m1x5_mu(vbool16_t vm,
+                                                   vbfloat16m1x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_v_bf16m1x6_mu(vbool16_t vm,
+                                                   vbfloat16m1x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_v_bf16m1x7_mu(vbool16_t vm,
+                                                   vbfloat16m1x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_v_bf16m1x8_mu(vbool16_t vm,
+                                                   vbfloat16m1x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_v_bf16m2x2_mu(vbool8_t vm,
+                                                   vbfloat16m2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_v_bf16m2x3_mu(vbool8_t vm,
+                                                   vbfloat16m2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_v_bf16m2x4_mu(vbool8_t vm,
+                                                   vbfloat16m2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_v_bf16m4x2_mu(vbool4_t vm,
+                                                   vbfloat16m4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m4_t rs2, size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                     vbfloat16mf4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                     vbfloat16mf4x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                     vbfloat16mf4x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                     vbfloat16mf4x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                     vbfloat16mf4x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                     vbfloat16mf4x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                     vbfloat16mf4x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                     vbfloat16mf2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                     vbfloat16mf2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                     vbfloat16mf2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                     vbfloat16mf2x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                     vbfloat16mf2x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                     vbfloat16mf2x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                     vbfloat16mf2x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_v_bf16m1x2_mu(vbool16_t vm,
+                                                   vbfloat16m1x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_v_bf16m1x3_mu(vbool16_t vm,
+                                                   vbfloat16m1x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_v_bf16m1x4_mu(vbool16_t vm,
+                                                   vbfloat16m1x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_v_bf16m1x5_mu(vbool16_t vm,
+                                                   vbfloat16m1x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_v_bf16m1x6_mu(vbool16_t vm,
+                                                   vbfloat16m1x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_v_bf16m1x7_mu(vbool16_t vm,
+                                                   vbfloat16m1x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_v_bf16m1x8_mu(vbool16_t vm,
+                                                   vbfloat16m1x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_v_bf16m2x2_mu(vbool8_t vm,
+                                                   vbfloat16m2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_v_bf16m2x3_mu(vbool8_t vm,
+                                                   vbfloat16m2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_v_bf16m2x4_mu(vbool8_t vm,
+                                                   vbfloat16m2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_v_bf16m4x2_mu(vbool4_t vm,
+                                                   vbfloat16m4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m4_t rs2, size_t vl);
+----
+
+[[policy-variant-vector-indexed-segment-store]]
+==== Vector Indexed Segment Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+=== BFloat16 Convert Intrinsics
+
+[[policy-variant-bf16-vector-narrow-convert]]
+==== Vector Narrowing Convert Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_tu(vbfloat16mf4_t vd,
+                                                   vfloat32mf2_t vs2,
+                                                   size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_tu(vbfloat16mf2_t vd,
+                                                   vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_tu(vbfloat16m1_t vd,
+                                                 vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_tu(vbfloat16m2_t vd,
+                                                 vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_tu(vbfloat16m4_t vd,
+                                                 vfloat32m8_t vs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_tum(vbool64_t vm,
+                                                    vbfloat16mf4_t vd,
+                                                    vfloat32mf2_t vs2,
+                                                    size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_tum(vbool32_t vm,
+                                                    vbfloat16mf2_t vd,
+                                                    vfloat32m1_t vs2,
+                                                    size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_tum(vbool16_t vm,
+                                                  vbfloat16m1_t vd,
+                                                  vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                                  vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                                  vfloat32m8_t vs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_tumu(vbool64_t vm,
+                                                     vbfloat16mf4_t vd,
+                                                     vfloat32mf2_t vs2,
+                                                     size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_tumu(vbool32_t vm,
+                                                     vbfloat16mf2_t vd,
+                                                     vfloat32m1_t vs2,
+                                                     size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_tumu(vbool16_t vm,
+                                                   vbfloat16m1_t vd,
+                                                   vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_tumu(vbool8_t vm,
+                                                   vbfloat16m2_t vd,
+                                                   vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_tumu(vbool4_t vm,
+                                                   vbfloat16m4_t vd,
+                                                   vfloat32m8_t vs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_mu(vbool64_t vm,
+                                                   vbfloat16mf4_t vd,
+                                                   vfloat32mf2_t vs2,
+                                                   size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_mu(vbool32_t vm,
+                                                   vbfloat16mf2_t vd,
+                                                   vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                                 vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                                 vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                                 vfloat32m8_t vs2, size_t vl);
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_tu(vbfloat16mf4_t vd,
+                                                      vfloat32mf2_t vs2,
+                                                      unsigned int frm,
+                                                      size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_tu(vbfloat16mf2_t vd,
+                                                      vfloat32m1_t vs2,
+                                                      unsigned int frm,
+                                                      size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_rm_tu(vbfloat16m1_t vd,
+                                                    vfloat32m2_t vs2,
+                                                    unsigned int frm,
+                                                    size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_rm_tu(vbfloat16m2_t vd,
+                                                    vfloat32m4_t vs2,
+                                                    unsigned int frm,
+                                                    size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_rm_tu(vbfloat16m4_t vd,
+                                                    vfloat32m8_t vs2,
+                                                    unsigned int frm,
+                                                    size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_tum(vbool64_t vm,
+                                                       vbfloat16mf4_t vd,
+                                                       vfloat32mf2_t vs2,
+                                                       unsigned int frm,
+                                                       size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_tum(vbool32_t vm,
+                                                       vbfloat16mf2_t vd,
+                                                       vfloat32m1_t vs2,
+                                                       unsigned int frm,
+                                                       size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_rm_tum(vbool16_t vm,
+                                                     vbfloat16m1_t vd,
+                                                     vfloat32m2_t vs2,
+                                                     unsigned int frm,
+                                                     size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_rm_tum(vbool8_t vm,
+                                                     vbfloat16m2_t vd,
+                                                     vfloat32m4_t vs2,
+                                                     unsigned int frm,
+                                                     size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_rm_tum(vbool4_t vm,
+                                                     vbfloat16m4_t vd,
+                                                     vfloat32m8_t vs2,
+                                                     unsigned int frm,
+                                                     size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_tumu(vbool64_t vm,
+                                                        vbfloat16mf4_t vd,
+                                                        vfloat32mf2_t vs2,
+                                                        unsigned int frm,
+                                                        size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_tumu(vbool32_t vm,
+                                                        vbfloat16mf2_t vd,
+                                                        vfloat32m1_t vs2,
+                                                        unsigned int frm,
+                                                        size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_rm_tumu(vbool16_t vm,
+                                                      vbfloat16m1_t vd,
+                                                      vfloat32m2_t vs2,
+                                                      unsigned int frm,
+                                                      size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_rm_tumu(vbool8_t vm,
+                                                      vbfloat16m2_t vd,
+                                                      vfloat32m4_t vs2,
+                                                      unsigned int frm,
+                                                      size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_rm_tumu(vbool4_t vm,
+                                                      vbfloat16m4_t vd,
+                                                      vfloat32m8_t vs2,
+                                                      unsigned int frm,
+                                                      size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_mu(vbool64_t vm,
+                                                      vbfloat16mf4_t vd,
+                                                      vfloat32mf2_t vs2,
+                                                      unsigned int frm,
+                                                      size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_mu(vbool32_t vm,
+                                                      vbfloat16mf2_t vd,
+                                                      vfloat32m1_t vs2,
+                                                      unsigned int frm,
+                                                      size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_rm_mu(vbool16_t vm,
+                                                    vbfloat16m1_t vd,
+                                                    vfloat32m2_t vs2,
+                                                    unsigned int frm,
+                                                    size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_rm_mu(vbool8_t vm,
+                                                    vbfloat16m2_t vd,
+                                                    vfloat32m4_t vs2,
+                                                    unsigned int frm,
+                                                    size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_rm_mu(vbool4_t vm,
+                                                    vbfloat16m4_t vd,
+                                                    vfloat32m8_t vs2,
+                                                    unsigned int frm,
+                                                    size_t vl);
+----
+
+[[policy-variant-bf16-vector-widening-convert]]
+==== Vector Widening Convert Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwcvtbf16_f_f_v_f32mf2_tu(vfloat32mf2_t vd,
+                                                 vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_f_v_f32m1_tu(vfloat32m1_t vd,
+                                               vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_f_v_f32m2_tu(vfloat32m2_t vd,
+                                               vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_f_v_f32m4_tu(vfloat32m4_t vd,
+                                               vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_f_v_f32m8_tu(vfloat32m8_t vd,
+                                               vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f_f_v_f32mf2_tum(vbool64_t vm,
+                                                  vfloat32mf2_t vd,
+                                                  vbfloat16mf4_t vs2,
+                                                  size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_f_v_f32m1_tum(vbool32_t vm, vfloat32m1_t vd,
+                                                vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_f_v_f32m2_tum(vbool16_t vm, vfloat32m2_t vd,
+                                                vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_f_v_f32m4_tum(vbool8_t vm, vfloat32m4_t vd,
+                                                vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_f_v_f32m8_tum(vbool4_t vm, vfloat32m8_t vd,
+                                                vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f_f_v_f32mf2_tumu(vbool64_t vm,
+                                                   vfloat32mf2_t vd,
+                                                   vbfloat16mf4_t vs2,
+                                                   size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_f_v_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                                 vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_f_v_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                                 vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_f_v_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                                 vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_f_v_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                                 vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f_f_v_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                                 vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_f_v_f32m1_mu(vbool32_t vm, vfloat32m1_t vd,
+                                               vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_f_v_f32m2_mu(vbool16_t vm, vfloat32m2_t vd,
+                                               vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_f_v_f32m4_mu(vbool8_t vm, vfloat32m4_t vd,
+                                               vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_f_v_f32m8_mu(vbool4_t vm, vfloat32m8_t vd,
+                                               vbfloat16m4_t vs2, size_t vl);
+----
+
+=== BFloat16 Arithmetic Intrinsics
+
+[[policy-variant-bf16-widening-multiply-accumulate]]
+==== Vector Widening Multiply-Accumulate Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_tu(vfloat32mf2_t vd,
+                                               vbfloat16mf4_t vs1,
+                                               vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_tu(vfloat32mf2_t vd, __bf16 vs1,
+                                               vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_tu(vfloat32m1_t vd,
+                                             vbfloat16mf2_t vs1,
+                                             vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_tu(vfloat32m1_t vd, __bf16 vs1,
+                                             vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_tu(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                             vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_tu(vfloat32m2_t vd, __bf16 vs1,
+                                             vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_tu(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                             vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_tu(vfloat32m4_t vd, __bf16 vs1,
+                                             vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_tu(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                             vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_tu(vfloat32m8_t vd, __bf16 vs1,
+                                             vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                                vbfloat16mf4_t vs1,
+                                                vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                                __bf16 vs1, vbfloat16mf4_t vs2,
+                                                size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_tum(vbool32_t vm, vfloat32m1_t vd,
+                                              vbfloat16mf2_t vs1,
+                                              vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_tum(vbool32_t vm, vfloat32m1_t vd,
+                                              __bf16 vs1, vbfloat16mf2_t vs2,
+                                              size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_tum(vbool16_t vm, vfloat32m2_t vd,
+                                              vbfloat16m1_t vs1,
+                                              vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_tum(vbool16_t vm, vfloat32m2_t vd,
+                                              __bf16 vs1, vbfloat16m1_t vs2,
+                                              size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_tum(vbool8_t vm, vfloat32m4_t vd,
+                                              vbfloat16m2_t vs1,
+                                              vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_tum(vbool8_t vm, vfloat32m4_t vd,
+                                              __bf16 vs1, vbfloat16m2_t vs2,
+                                              size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_tum(vbool4_t vm, vfloat32m8_t vd,
+                                              vbfloat16m4_t vs1,
+                                              vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_tum(vbool4_t vm, vfloat32m8_t vd,
+                                              __bf16 vs1, vbfloat16m4_t vs2,
+                                              size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                                 vbfloat16mf4_t vs1,
+                                                 vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                                 __bf16 vs1, vbfloat16mf4_t vs2,
+                                                 size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                               vbfloat16mf2_t vs1,
+                                               vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                               __bf16 vs1, vbfloat16mf2_t vs2,
+                                               size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                               vbfloat16m1_t vs1,
+                                               vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                               __bf16 vs1, vbfloat16m1_t vs2,
+                                               size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                               vbfloat16m2_t vs1,
+                                               vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                               __bf16 vs1, vbfloat16m2_t vs2,
+                                               size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                               vbfloat16m4_t vs1,
+                                               vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                               __bf16 vs1, vbfloat16m4_t vs2,
+                                               size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                               vbfloat16mf4_t vs1,
+                                               vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                               __bf16 vs1, vbfloat16mf4_t vs2,
+                                               size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_mu(vbool32_t vm, vfloat32m1_t vd,
+                                             vbfloat16mf2_t vs1,
+                                             vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_mu(vbool32_t vm, vfloat32m1_t vd,
+                                             __bf16 vs1, vbfloat16mf2_t vs2,
+                                             size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_mu(vbool16_t vm, vfloat32m2_t vd,
+                                             vbfloat16m1_t vs1,
+                                             vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_mu(vbool16_t vm, vfloat32m2_t vd,
+                                             __bf16 vs1, vbfloat16m1_t vs2,
+                                             size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_mu(vbool8_t vm, vfloat32m4_t vd,
+                                             vbfloat16m2_t vs1,
+                                             vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_mu(vbool8_t vm, vfloat32m4_t vd,
+                                             __bf16 vs1, vbfloat16m2_t vs2,
+                                             size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_mu(vbool4_t vm, vfloat32m8_t vd,
+                                             vbfloat16m4_t vs1,
+                                             vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_mu(vbool4_t vm, vfloat32m8_t vd,
+                                             __bf16 vs1, vbfloat16m4_t vs2,
+                                             size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_rm_tu(vfloat32mf2_t vd,
+                                                  vbfloat16mf4_t vs1,
+                                                  vbfloat16mf4_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_rm_tu(vfloat32mf2_t vd, __bf16 vs1,
+                                                  vbfloat16mf4_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_rm_tu(vfloat32m1_t vd,
+                                                vbfloat16mf2_t vs1,
+                                                vbfloat16mf2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_rm_tu(vfloat32m1_t vd, __bf16 vs1,
+                                                vbfloat16mf2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_rm_tu(vfloat32m2_t vd,
+                                                vbfloat16m1_t vs1,
+                                                vbfloat16m1_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_rm_tu(vfloat32m2_t vd, __bf16 vs1,
+                                                vbfloat16m1_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_rm_tu(vfloat32m4_t vd,
+                                                vbfloat16m2_t vs1,
+                                                vbfloat16m2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_rm_tu(vfloat32m4_t vd, __bf16 vs1,
+                                                vbfloat16m2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_rm_tu(vfloat32m8_t vd,
+                                                vbfloat16m4_t vs1,
+                                                vbfloat16m4_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_rm_tu(vfloat32m8_t vd, __bf16 vs1,
+                                                vbfloat16m4_t vs2,
+                                                unsigned int frm, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_rm_tum(vbool64_t vm,
+                                                   vfloat32mf2_t vd,
+                                                   vbfloat16mf4_t vs1,
+                                                   vbfloat16mf4_t vs2,
+                                                   unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_rm_tum(vbool64_t vm,
+                                                   vfloat32mf2_t vd, __bf16 vs1,
+                                                   vbfloat16mf4_t vs2,
+                                                   unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_rm_tum(vbool32_t vm, vfloat32m1_t vd,
+                                                 vbfloat16mf2_t vs1,
+                                                 vbfloat16mf2_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_rm_tum(vbool32_t vm, vfloat32m1_t vd,
+                                                 __bf16 vs1, vbfloat16mf2_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_rm_tum(vbool16_t vm, vfloat32m2_t vd,
+                                                 vbfloat16m1_t vs1,
+                                                 vbfloat16m1_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_rm_tum(vbool16_t vm, vfloat32m2_t vd,
+                                                 __bf16 vs1, vbfloat16m1_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_rm_tum(vbool8_t vm, vfloat32m4_t vd,
+                                                 vbfloat16m2_t vs1,
+                                                 vbfloat16m2_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_rm_tum(vbool8_t vm, vfloat32m4_t vd,
+                                                 __bf16 vs1, vbfloat16m2_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_rm_tum(vbool4_t vm, vfloat32m8_t vd,
+                                                 vbfloat16m4_t vs1,
+                                                 vbfloat16m4_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_rm_tum(vbool4_t vm, vfloat32m8_t vd,
+                                                 __bf16 vs1, vbfloat16m4_t vs2,
+                                                 unsigned int frm, size_t vl);
+// masked functions
+vfloat32mf2_t
+__riscv_vfwmaccbf16_vv_f32mf2_rm_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                      vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32mf2_t
+__riscv_vfwmaccbf16_vf_f32mf2_rm_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                      __bf16 vs1, vbfloat16mf4_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_rm_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                                  vbfloat16mf2_t vs1,
+                                                  vbfloat16mf2_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_rm_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                                  __bf16 vs1,
+                                                  vbfloat16mf2_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_rm_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                                  vbfloat16m1_t vs1,
+                                                  vbfloat16m1_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_rm_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                                  __bf16 vs1, vbfloat16m1_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_rm_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                                  vbfloat16m2_t vs1,
+                                                  vbfloat16m2_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_rm_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                                  __bf16 vs1, vbfloat16m2_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_rm_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                                  vbfloat16m4_t vs1,
+                                                  vbfloat16m4_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_rm_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                                  __bf16 vs1, vbfloat16m4_t vs2,
+                                                  unsigned int frm, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_rm_mu(vbool64_t vm,
+                                                  vfloat32mf2_t vd,
+                                                  vbfloat16mf4_t vs1,
+                                                  vbfloat16mf4_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_rm_mu(vbool64_t vm,
+                                                  vfloat32mf2_t vd, __bf16 vs1,
+                                                  vbfloat16mf4_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_rm_mu(vbool32_t vm, vfloat32m1_t vd,
+                                                vbfloat16mf2_t vs1,
+                                                vbfloat16mf2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_rm_mu(vbool32_t vm, vfloat32m1_t vd,
+                                                __bf16 vs1, vbfloat16mf2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_rm_mu(vbool16_t vm, vfloat32m2_t vd,
+                                                vbfloat16m1_t vs1,
+                                                vbfloat16m1_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_rm_mu(vbool16_t vm, vfloat32m2_t vd,
+                                                __bf16 vs1, vbfloat16m1_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_rm_mu(vbool8_t vm, vfloat32m4_t vd,
+                                                vbfloat16m2_t vs1,
+                                                vbfloat16m2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_rm_mu(vbool8_t vm, vfloat32m4_t vd,
+                                                __bf16 vs1, vbfloat16m2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_rm_mu(vbool4_t vm, vfloat32m8_t vd,
+                                                vbfloat16m4_t vs1,
+                                                vbfloat16m4_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_rm_mu(vbool4_t vm, vfloat32m8_t vd,
+                                                __bf16 vs1, vbfloat16m4_t vs2,
+                                                unsigned int frm, size_t vl);
+----
+
+=== BFloat16 Miscellaneous Vector Utility Intrinsics
+
+[[policy-variant-reinterpret-cast-conversion]]
+==== Reinterpret Cast Conversion Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-lmul-extensionn]]
+==== Vector LMUL Extension Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-lmul-truncation]]
+==== Vector LMUL Truncation Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-initialization]]
+==== Vector Initialization Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-insertion]]
+==== Vector Insertion Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-extraction]]
+==== Vector Extraction Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-creation]]
+==== Vector Creation Intrinsics
+Intrinsics here don't have a policy variant.
diff --git a/auto-generated/bfloat16/policy_funcs/intrinsic_funcs/00_bfloat16_vector_loads_and_stores_intrinsics.adoc b/auto-generated/bfloat16/policy_funcs/intrinsic_funcs/00_bfloat16_vector_loads_and_stores_intrinsics.adoc
new file mode 100644
index 000000000..7d99fcc30
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/intrinsic_funcs/00_bfloat16_vector_loads_and_stores_intrinsics.adoc
@@ -0,0 +1,372 @@
+
+=== BFloat16 Vector Loads and Stores Intrinsics
+
+[[policy-variant-bf16-vector-unit-stride-load]]
+==== Vector Unit-Stride Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vle16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                          size_t vl);
+vbfloat16mf2_t __riscv_vle16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                          size_t vl);
+vbfloat16m1_t __riscv_vle16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                        size_t vl);
+vbfloat16m2_t __riscv_vle16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                        size_t vl);
+vbfloat16m4_t __riscv_vle16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                        size_t vl);
+vbfloat16m8_t __riscv_vle16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                        size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                           const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                           const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                         const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                         const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                         const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                         const __bf16 *rs1, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                            const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                            const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                          const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                          const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                          const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                          const __bf16 *rs1, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                          const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                          const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                        const __bf16 *rs1, size_t vl);
+----
+
+[[policy-variant-bf16-vector-unit-stride-store]]
+==== Vector Unit-Stride Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-strided-load]]
+==== Vector Strided Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vlse16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                           ptrdiff_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vlse16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                           ptrdiff_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vlse16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                         ptrdiff_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vlse16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                         ptrdiff_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vlse16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                         ptrdiff_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vlse16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                         ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vlse16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                            const __bf16 *rs1, ptrdiff_t rs2,
+                                            size_t vl);
+vbfloat16mf2_t __riscv_vlse16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                            const __bf16 *rs1, ptrdiff_t rs2,
+                                            size_t vl);
+vbfloat16m1_t __riscv_vlse16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                          const __bf16 *rs1, ptrdiff_t rs2,
+                                          size_t vl);
+vbfloat16m2_t __riscv_vlse16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                          const __bf16 *rs1, ptrdiff_t rs2,
+                                          size_t vl);
+vbfloat16m4_t __riscv_vlse16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                          const __bf16 *rs1, ptrdiff_t rs2,
+                                          size_t vl);
+vbfloat16m8_t __riscv_vlse16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                          const __bf16 *rs1, ptrdiff_t rs2,
+                                          size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vlse16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                             const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl);
+vbfloat16mf2_t __riscv_vlse16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                             const __bf16 *rs1, ptrdiff_t rs2,
+                                             size_t vl);
+vbfloat16m1_t __riscv_vlse16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                           const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl);
+vbfloat16m2_t __riscv_vlse16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                           const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl);
+vbfloat16m4_t __riscv_vlse16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                           const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl);
+vbfloat16m8_t __riscv_vlse16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                           const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vlse16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                           const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl);
+vbfloat16mf2_t __riscv_vlse16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                           const __bf16 *rs1, ptrdiff_t rs2,
+                                           size_t vl);
+vbfloat16m1_t __riscv_vlse16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16m2_t __riscv_vlse16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16m4_t __riscv_vlse16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16m8_t __riscv_vlse16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+----
+
+[[policy-variant-vector-strided-store]]
+==== Vector Strided Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-indexed-load]]
+==== Vector Indexed Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vloxei16_v_bf16mf4_tu(vbfloat16mf4_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_v_bf16mf2_tu(vbfloat16mf2_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                           vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vloxei16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                           vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vloxei16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                           vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vloxei16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                           vuint16m8_t rs2, size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_v_bf16mf4_tu(vbfloat16mf4_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_v_bf16mf2_tu(vbfloat16mf2_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                           vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vluxei16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                           vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vluxei16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                           vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vluxei16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                           vuint16m8_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                              const __bf16 *rs1,
+                                              vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                              const __bf16 *rs1,
+                                              vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                            const __bf16 *rs1, vuint16m1_t rs2,
+                                            size_t vl);
+vbfloat16m2_t __riscv_vloxei16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                            const __bf16 *rs1, vuint16m2_t rs2,
+                                            size_t vl);
+vbfloat16m4_t __riscv_vloxei16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                            const __bf16 *rs1, vuint16m4_t rs2,
+                                            size_t vl);
+vbfloat16m8_t __riscv_vloxei16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                            const __bf16 *rs1, vuint16m8_t rs2,
+                                            size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                              const __bf16 *rs1,
+                                              vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                              const __bf16 *rs1,
+                                              vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                            const __bf16 *rs1, vuint16m1_t rs2,
+                                            size_t vl);
+vbfloat16m2_t __riscv_vluxei16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                            const __bf16 *rs1, vuint16m2_t rs2,
+                                            size_t vl);
+vbfloat16m4_t __riscv_vluxei16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                            const __bf16 *rs1, vuint16m4_t rs2,
+                                            size_t vl);
+vbfloat16m8_t __riscv_vluxei16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                            const __bf16 *rs1, vuint16m8_t rs2,
+                                            size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                               const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                               const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                             const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl);
+vbfloat16m2_t __riscv_vloxei16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                             const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl);
+vbfloat16m4_t __riscv_vloxei16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                             const __bf16 *rs1, vuint16m4_t rs2,
+                                             size_t vl);
+vbfloat16m8_t __riscv_vloxei16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                             const __bf16 *rs1, vuint16m8_t rs2,
+                                             size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                               const __bf16 *rs1,
+                                               vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                               const __bf16 *rs1,
+                                               vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                             const __bf16 *rs1, vuint16m1_t rs2,
+                                             size_t vl);
+vbfloat16m2_t __riscv_vluxei16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                             const __bf16 *rs1, vuint16m2_t rs2,
+                                             size_t vl);
+vbfloat16m4_t __riscv_vluxei16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                             const __bf16 *rs1, vuint16m4_t rs2,
+                                             size_t vl);
+vbfloat16m8_t __riscv_vluxei16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                             const __bf16 *rs1, vuint16m8_t rs2,
+                                             size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                           const __bf16 *rs1, vuint16m1_t rs2,
+                                           size_t vl);
+vbfloat16m2_t __riscv_vloxei16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                           const __bf16 *rs1, vuint16m2_t rs2,
+                                           size_t vl);
+vbfloat16m4_t __riscv_vloxei16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                           const __bf16 *rs1, vuint16m4_t rs2,
+                                           size_t vl);
+vbfloat16m8_t __riscv_vloxei16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                           const __bf16 *rs1, vuint16m8_t rs2,
+                                           size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                             const __bf16 *rs1,
+                                             vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                           const __bf16 *rs1, vuint16m1_t rs2,
+                                           size_t vl);
+vbfloat16m2_t __riscv_vluxei16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                           const __bf16 *rs1, vuint16m2_t rs2,
+                                           size_t vl);
+vbfloat16m4_t __riscv_vluxei16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                           const __bf16 *rs1, vuint16m4_t rs2,
+                                           size_t vl);
+vbfloat16m8_t __riscv_vluxei16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                           const __bf16 *rs1, vuint16m8_t rs2,
+                                           size_t vl);
+----
+
+[[policy-variant-vector-indexed-store]]
+==== Vector Indexed Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-unit-stride-fault-only-first-loads]]
+==== Unit-stride Fault-Only-First Loads Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vle16ff_v_bf16mf4_tu(vbfloat16mf4_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_v_bf16mf2_tu(vbfloat16mf2_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+vbfloat16m1_t __riscv_vle16ff_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                          size_t *new_vl, size_t vl);
+vbfloat16m2_t __riscv_vle16ff_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                          size_t *new_vl, size_t vl);
+vbfloat16m4_t __riscv_vle16ff_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                          size_t *new_vl, size_t vl);
+vbfloat16m8_t __riscv_vle16ff_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                          size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                             const __bf16 *rs1, size_t *new_vl,
+                                             size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                             const __bf16 *rs1, size_t *new_vl,
+                                             size_t vl);
+vbfloat16m1_t __riscv_vle16ff_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                           const __bf16 *rs1, size_t *new_vl,
+                                           size_t vl);
+vbfloat16m2_t __riscv_vle16ff_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                           const __bf16 *rs1, size_t *new_vl,
+                                           size_t vl);
+vbfloat16m4_t __riscv_vle16ff_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                           const __bf16 *rs1, size_t *new_vl,
+                                           size_t vl);
+vbfloat16m8_t __riscv_vle16ff_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                           const __bf16 *rs1, size_t *new_vl,
+                                           size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                              const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                              const __bf16 *rs1, size_t *new_vl,
+                                              size_t vl);
+vbfloat16m1_t __riscv_vle16ff_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+vbfloat16m2_t __riscv_vle16ff_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+vbfloat16m4_t __riscv_vle16ff_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+vbfloat16m8_t __riscv_vle16ff_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                            const __bf16 *rs1, size_t *new_vl,
+                                            size_t vl);
+vbfloat16m1_t __riscv_vle16ff_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16m2_t __riscv_vle16ff_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16m4_t __riscv_vle16ff_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16m8_t __riscv_vle16ff_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+----
diff --git a/auto-generated/bfloat16/policy_funcs/intrinsic_funcs/01_bfloat16_vector_loads_and_stores_segment_intrinsics.adoc b/auto-generated/bfloat16/policy_funcs/intrinsic_funcs/01_bfloat16_vector_loads_and_stores_segment_intrinsics.adoc
new file mode 100644
index 000000000..f67848b46
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/intrinsic_funcs/01_bfloat16_vector_loads_and_stores_segment_intrinsics.adoc
@@ -0,0 +1,1991 @@
+
+=== BFloat16 Vector Loads and Stores Segment Intrinsics
+
+[[policy-variant-vector-unit-stride-segment-load]]
+==== Vector Unit-Stride Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vlseg2e16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                   vbfloat16mf4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                   vbfloat16mf4x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                   vbfloat16mf4x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                   vbfloat16mf4x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                   vbfloat16mf4x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                   vbfloat16mf4x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                   vbfloat16mf4x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                   vbfloat16mf2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                   vbfloat16mf2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                   vbfloat16mf2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                   vbfloat16mf2x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                   vbfloat16mf2x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                   vbfloat16mf2x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                   vbfloat16mf2x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_v_bf16m1x2_tum(vbool16_t vm,
+                                                 vbfloat16m1x2_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_v_bf16m1x3_tum(vbool16_t vm,
+                                                 vbfloat16m1x3_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_v_bf16m1x4_tum(vbool16_t vm,
+                                                 vbfloat16m1x4_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_v_bf16m1x5_tum(vbool16_t vm,
+                                                 vbfloat16m1x5_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_v_bf16m1x6_tum(vbool16_t vm,
+                                                 vbfloat16m1x6_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_v_bf16m1x7_tum(vbool16_t vm,
+                                                 vbfloat16m1x7_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_v_bf16m1x8_tum(vbool16_t vm,
+                                                 vbfloat16m1x8_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_v_bf16m2x2_tum(vbool8_t vm,
+                                                 vbfloat16m2x2_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_v_bf16m2x3_tum(vbool8_t vm,
+                                                 vbfloat16m2x3_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_v_bf16m2x4_tum(vbool8_t vm,
+                                                 vbfloat16m2x4_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_v_bf16m4x2_tum(vbool4_t vm,
+                                                 vbfloat16m4x2_t vd,
+                                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_v_bf16mf4x2_tum(vbool64_t vm,
+                                                     vbfloat16mf4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_v_bf16mf4x3_tum(vbool64_t vm,
+                                                     vbfloat16mf4x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_v_bf16mf4x4_tum(vbool64_t vm,
+                                                     vbfloat16mf4x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_v_bf16mf4x5_tum(vbool64_t vm,
+                                                     vbfloat16mf4x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_v_bf16mf4x6_tum(vbool64_t vm,
+                                                     vbfloat16mf4x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_v_bf16mf4x7_tum(vbool64_t vm,
+                                                     vbfloat16mf4x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_v_bf16mf4x8_tum(vbool64_t vm,
+                                                     vbfloat16mf4x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_v_bf16mf2x2_tum(vbool32_t vm,
+                                                     vbfloat16mf2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_v_bf16mf2x3_tum(vbool32_t vm,
+                                                     vbfloat16mf2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_v_bf16mf2x4_tum(vbool32_t vm,
+                                                     vbfloat16mf2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_v_bf16mf2x5_tum(vbool32_t vm,
+                                                     vbfloat16mf2x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_v_bf16mf2x6_tum(vbool32_t vm,
+                                                     vbfloat16mf2x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_v_bf16mf2x7_tum(vbool32_t vm,
+                                                     vbfloat16mf2x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_v_bf16mf2x8_tum(vbool32_t vm,
+                                                     vbfloat16mf2x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     size_t *new_vl, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_v_bf16m1x2_tum(vbool16_t vm,
+                                                   vbfloat16m1x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_v_bf16m1x3_tum(vbool16_t vm,
+                                                   vbfloat16m1x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_v_bf16m1x4_tum(vbool16_t vm,
+                                                   vbfloat16m1x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_v_bf16m1x5_tum(vbool16_t vm,
+                                                   vbfloat16m1x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_v_bf16m1x6_tum(vbool16_t vm,
+                                                   vbfloat16m1x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_v_bf16m1x7_tum(vbool16_t vm,
+                                                   vbfloat16m1x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_v_bf16m1x8_tum(vbool16_t vm,
+                                                   vbfloat16m1x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_v_bf16m2x2_tum(vbool8_t vm,
+                                                   vbfloat16m2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_v_bf16m2x3_tum(vbool8_t vm,
+                                                   vbfloat16m2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_v_bf16m2x4_tum(vbool8_t vm,
+                                                   vbfloat16m2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_v_bf16m4x2_tum(vbool4_t vm,
+                                                   vbfloat16m4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                  vbfloat16m1x2_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                  vbfloat16m1x3_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                  vbfloat16m1x4_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                  vbfloat16m1x5_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                  vbfloat16m1x6_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                  vbfloat16m1x7_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                  vbfloat16m1x8_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_v_bf16m2x2_tumu(vbool8_t vm,
+                                                  vbfloat16m2x2_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_v_bf16m2x3_tumu(vbool8_t vm,
+                                                  vbfloat16m2x3_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_v_bf16m2x4_tumu(vbool8_t vm,
+                                                  vbfloat16m2x4_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_v_bf16m4x2_tumu(vbool4_t vm,
+                                                  vbfloat16m4x2_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                      vbfloat16mf4x2_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                      vbfloat16mf4x3_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                      vbfloat16mf4x4_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                      vbfloat16mf4x5_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                      vbfloat16mf4x6_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                      vbfloat16mf4x7_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                      vbfloat16mf4x8_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                      vbfloat16mf2x2_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                      vbfloat16mf2x3_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                      vbfloat16mf2x4_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                      vbfloat16mf2x5_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                      vbfloat16mf2x6_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                      vbfloat16mf2x7_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                      vbfloat16mf2x8_t vd,
+                                                      const __bf16 *rs1,
+                                                      size_t *new_vl,
+                                                      size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_v_bf16m1x2_tumu(vbool16_t vm,
+                                                    vbfloat16m1x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_v_bf16m1x3_tumu(vbool16_t vm,
+                                                    vbfloat16m1x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_v_bf16m1x4_tumu(vbool16_t vm,
+                                                    vbfloat16m1x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_v_bf16m1x5_tumu(vbool16_t vm,
+                                                    vbfloat16m1x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_v_bf16m1x6_tumu(vbool16_t vm,
+                                                    vbfloat16m1x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_v_bf16m1x7_tumu(vbool16_t vm,
+                                                    vbfloat16m1x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_v_bf16m1x8_tumu(vbool16_t vm,
+                                                    vbfloat16m1x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_v_bf16m2x2_tumu(vbool8_t vm,
+                                                    vbfloat16m2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_v_bf16m2x3_tumu(vbool8_t vm,
+                                                    vbfloat16m2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_v_bf16m2x4_tumu(vbool8_t vm,
+                                                    vbfloat16m2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_v_bf16m4x2_tumu(vbool4_t vm,
+                                                    vbfloat16m4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                  vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                  vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                  vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                  vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                  vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                  vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                  vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                  vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                  vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                  vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                  vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                  vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                  vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                  vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_v_bf16m1x2_mu(vbool16_t vm,
+                                                vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_v_bf16m1x3_mu(vbool16_t vm,
+                                                vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_v_bf16m1x4_mu(vbool16_t vm,
+                                                vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_v_bf16m1x5_mu(vbool16_t vm,
+                                                vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_v_bf16m1x6_mu(vbool16_t vm,
+                                                vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_v_bf16m1x7_mu(vbool16_t vm,
+                                                vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_v_bf16m1x8_mu(vbool16_t vm,
+                                                vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_v_bf16mf4x2_mu(vbool64_t vm,
+                                                    vbfloat16mf4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_v_bf16mf4x3_mu(vbool64_t vm,
+                                                    vbfloat16mf4x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_v_bf16mf4x4_mu(vbool64_t vm,
+                                                    vbfloat16mf4x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_v_bf16mf4x5_mu(vbool64_t vm,
+                                                    vbfloat16mf4x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_v_bf16mf4x6_mu(vbool64_t vm,
+                                                    vbfloat16mf4x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_v_bf16mf4x7_mu(vbool64_t vm,
+                                                    vbfloat16mf4x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_v_bf16mf4x8_mu(vbool64_t vm,
+                                                    vbfloat16mf4x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_v_bf16mf2x2_mu(vbool32_t vm,
+                                                    vbfloat16mf2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_v_bf16mf2x3_mu(vbool32_t vm,
+                                                    vbfloat16mf2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_v_bf16mf2x4_mu(vbool32_t vm,
+                                                    vbfloat16mf2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_v_bf16mf2x5_mu(vbool32_t vm,
+                                                    vbfloat16mf2x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_v_bf16mf2x6_mu(vbool32_t vm,
+                                                    vbfloat16mf2x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_v_bf16mf2x7_mu(vbool32_t vm,
+                                                    vbfloat16mf2x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_v_bf16mf2x8_mu(vbool32_t vm,
+                                                    vbfloat16mf2x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    size_t *new_vl, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_v_bf16m1x2_mu(vbool16_t vm,
+                                                  vbfloat16m1x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_v_bf16m1x3_mu(vbool16_t vm,
+                                                  vbfloat16m1x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_v_bf16m1x4_mu(vbool16_t vm,
+                                                  vbfloat16m1x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_v_bf16m1x5_mu(vbool16_t vm,
+                                                  vbfloat16m1x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_v_bf16m1x6_mu(vbool16_t vm,
+                                                  vbfloat16m1x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_v_bf16m1x7_mu(vbool16_t vm,
+                                                  vbfloat16m1x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_v_bf16m1x8_mu(vbool16_t vm,
+                                                  vbfloat16m1x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_v_bf16m2x2_mu(vbool8_t vm,
+                                                  vbfloat16m2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_v_bf16m2x3_mu(vbool8_t vm,
+                                                  vbfloat16m2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_v_bf16m2x4_mu(vbool8_t vm,
+                                                  vbfloat16m2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_v_bf16m4x2_mu(vbool4_t vm,
+                                                  vbfloat16m4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl);
+----
+
+[[policy-variant-vecrtor-unit-stride-segment-store]]
+==== Vector Unit-Stride Segment Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-strided-segment-load]]
+==== Vector Strided Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vlsseg2e16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                    vbfloat16mf4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                    vbfloat16mf4x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                    vbfloat16mf4x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                    vbfloat16mf4x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                    vbfloat16mf4x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                    vbfloat16mf4x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                    vbfloat16mf4x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                    vbfloat16mf2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                    vbfloat16mf2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                    vbfloat16mf2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                    vbfloat16mf2x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                    vbfloat16mf2x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                    vbfloat16mf2x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                    vbfloat16mf2x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_v_bf16m1x2_tum(vbool16_t vm,
+                                                  vbfloat16m1x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_v_bf16m1x3_tum(vbool16_t vm,
+                                                  vbfloat16m1x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_v_bf16m1x4_tum(vbool16_t vm,
+                                                  vbfloat16m1x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_v_bf16m1x5_tum(vbool16_t vm,
+                                                  vbfloat16m1x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_v_bf16m1x6_tum(vbool16_t vm,
+                                                  vbfloat16m1x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_v_bf16m1x7_tum(vbool16_t vm,
+                                                  vbfloat16m1x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_v_bf16m1x8_tum(vbool16_t vm,
+                                                  vbfloat16m1x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_v_bf16m2x2_tum(vbool8_t vm,
+                                                  vbfloat16m2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_v_bf16m2x3_tum(vbool8_t vm,
+                                                  vbfloat16m2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_v_bf16m2x4_tum(vbool8_t vm,
+                                                  vbfloat16m2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_v_bf16m4x2_tum(vbool4_t vm,
+                                                  vbfloat16m4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                     vbfloat16mf4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                     vbfloat16mf4x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                     vbfloat16mf4x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                     vbfloat16mf4x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                     vbfloat16mf4x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                     vbfloat16mf4x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                     vbfloat16mf4x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                     vbfloat16mf2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                     vbfloat16mf2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                     vbfloat16mf2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                     vbfloat16mf2x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                     vbfloat16mf2x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                     vbfloat16mf2x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                     vbfloat16mf2x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                   vbfloat16m1x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                   vbfloat16m1x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                   vbfloat16m1x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                   vbfloat16m1x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                   vbfloat16m1x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                   vbfloat16m1x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                   vbfloat16m1x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_v_bf16m2x2_tumu(vbool8_t vm,
+                                                   vbfloat16m2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_v_bf16m2x3_tumu(vbool8_t vm,
+                                                   vbfloat16m2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_v_bf16m2x4_tumu(vbool8_t vm,
+                                                   vbfloat16m2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_v_bf16m4x2_tumu(vbool4_t vm,
+                                                   vbfloat16m4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                   vbfloat16mf4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                   vbfloat16mf4x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                   vbfloat16mf4x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                   vbfloat16mf4x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                   vbfloat16mf4x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                   vbfloat16mf4x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                   vbfloat16mf4x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                   vbfloat16mf2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                   vbfloat16mf2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                   vbfloat16mf2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                   vbfloat16mf2x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                   vbfloat16mf2x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                   vbfloat16mf2x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                   vbfloat16mf2x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_v_bf16m1x2_mu(vbool16_t vm,
+                                                 vbfloat16m1x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_v_bf16m1x3_mu(vbool16_t vm,
+                                                 vbfloat16m1x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_v_bf16m1x4_mu(vbool16_t vm,
+                                                 vbfloat16m1x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_v_bf16m1x5_mu(vbool16_t vm,
+                                                 vbfloat16m1x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_v_bf16m1x6_mu(vbool16_t vm,
+                                                 vbfloat16m1x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_v_bf16m1x7_mu(vbool16_t vm,
+                                                 vbfloat16m1x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_v_bf16m1x8_mu(vbool16_t vm,
+                                                 vbfloat16m1x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_v_bf16m2x2_mu(vbool8_t vm,
+                                                 vbfloat16m2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_v_bf16m2x3_mu(vbool8_t vm,
+                                                 vbfloat16m2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_v_bf16m2x4_mu(vbool8_t vm,
+                                                 vbfloat16m2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_v_bf16m4x2_mu(vbool4_t vm,
+                                                 vbfloat16m4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl);
+----
+
+[[policy-variant-vector-strided-segment-store]]
+==== Vector Strided Segment Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-indexed-segment-load]]
+==== Vector Indexed Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m4_t rs2, size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m4_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                      vbfloat16mf4x2_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                      vbfloat16mf4x3_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                      vbfloat16mf4x4_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                      vbfloat16mf4x5_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                      vbfloat16mf4x6_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                      vbfloat16mf4x7_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                      vbfloat16mf4x8_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                      vbfloat16mf2x2_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                      vbfloat16mf2x3_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                      vbfloat16mf2x4_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                      vbfloat16mf2x5_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                      vbfloat16mf2x6_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                      vbfloat16mf2x7_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                      vbfloat16mf2x8_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_v_bf16m1x2_tum(vbool16_t vm,
+                                                    vbfloat16m1x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_v_bf16m1x3_tum(vbool16_t vm,
+                                                    vbfloat16m1x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_v_bf16m1x4_tum(vbool16_t vm,
+                                                    vbfloat16m1x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_v_bf16m1x5_tum(vbool16_t vm,
+                                                    vbfloat16m1x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_v_bf16m1x6_tum(vbool16_t vm,
+                                                    vbfloat16m1x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_v_bf16m1x7_tum(vbool16_t vm,
+                                                    vbfloat16m1x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_v_bf16m1x8_tum(vbool16_t vm,
+                                                    vbfloat16m1x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_v_bf16m2x2_tum(vbool8_t vm,
+                                                    vbfloat16m2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_v_bf16m2x3_tum(vbool8_t vm,
+                                                    vbfloat16m2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_v_bf16m2x4_tum(vbool8_t vm,
+                                                    vbfloat16m2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_v_bf16m4x2_tum(vbool4_t vm,
+                                                    vbfloat16m4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m4_t rs2, size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                      vbfloat16mf4x2_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                      vbfloat16mf4x3_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                      vbfloat16mf4x4_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                      vbfloat16mf4x5_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                      vbfloat16mf4x6_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                      vbfloat16mf4x7_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                      vbfloat16mf4x8_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf4_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                      vbfloat16mf2x2_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                      vbfloat16mf2x3_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                      vbfloat16mf2x4_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                      vbfloat16mf2x5_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                      vbfloat16mf2x6_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                      vbfloat16mf2x7_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                      vbfloat16mf2x8_t vd,
+                                                      const __bf16 *rs1,
+                                                      vuint16mf2_t rs2,
+                                                      size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_v_bf16m1x2_tum(vbool16_t vm,
+                                                    vbfloat16m1x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_v_bf16m1x3_tum(vbool16_t vm,
+                                                    vbfloat16m1x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_v_bf16m1x4_tum(vbool16_t vm,
+                                                    vbfloat16m1x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_v_bf16m1x5_tum(vbool16_t vm,
+                                                    vbfloat16m1x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_v_bf16m1x6_tum(vbool16_t vm,
+                                                    vbfloat16m1x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_v_bf16m1x7_tum(vbool16_t vm,
+                                                    vbfloat16m1x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_v_bf16m1x8_tum(vbool16_t vm,
+                                                    vbfloat16m1x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_v_bf16m2x2_tum(vbool8_t vm,
+                                                    vbfloat16m2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_v_bf16m2x3_tum(vbool8_t vm,
+                                                    vbfloat16m2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_v_bf16m2x4_tum(vbool8_t vm,
+                                                    vbfloat16m2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_v_bf16m4x2_tum(vbool4_t vm,
+                                                    vbfloat16m4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16m4_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x2_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x3_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x4_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x5_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x6_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x7_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x8_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x2_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x3_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x4_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x5_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x6_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x7_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x8_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                     vbfloat16m1x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                     vbfloat16m1x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                     vbfloat16m1x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                     vbfloat16m1x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                     vbfloat16m1x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                     vbfloat16m1x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                     vbfloat16m1x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_v_bf16m2x2_tumu(vbool8_t vm,
+                                                     vbfloat16m2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m2_t rs2,
+                                                     size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_v_bf16m2x3_tumu(vbool8_t vm,
+                                                     vbfloat16m2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m2_t rs2,
+                                                     size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_v_bf16m2x4_tumu(vbool8_t vm,
+                                                     vbfloat16m2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m2_t rs2,
+                                                     size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_v_bf16m4x2_tumu(vbool4_t vm,
+                                                     vbfloat16m4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x2_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x3_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x4_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x5_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x6_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x7_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                       vbfloat16mf4x8_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf4_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x2_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x3_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x4_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x5_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x6_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x7_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                       vbfloat16mf2x8_t vd,
+                                                       const __bf16 *rs1,
+                                                       vuint16mf2_t rs2,
+                                                       size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                     vbfloat16m1x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                     vbfloat16m1x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                     vbfloat16m1x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                     vbfloat16m1x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                     vbfloat16m1x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                     vbfloat16m1x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                     vbfloat16m1x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m1_t rs2,
+                                                     size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_v_bf16m2x2_tumu(vbool8_t vm,
+                                                     vbfloat16m2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m2_t rs2,
+                                                     size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_v_bf16m2x3_tumu(vbool8_t vm,
+                                                     vbfloat16m2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m2_t rs2,
+                                                     size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_v_bf16m2x4_tumu(vbool8_t vm,
+                                                     vbfloat16m2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m2_t rs2,
+                                                     size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_v_bf16m4x2_tumu(vbool4_t vm,
+                                                     vbfloat16m4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16m4_t rs2,
+                                                     size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                     vbfloat16mf4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                     vbfloat16mf4x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                     vbfloat16mf4x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                     vbfloat16mf4x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                     vbfloat16mf4x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                     vbfloat16mf4x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                     vbfloat16mf4x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                     vbfloat16mf2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                     vbfloat16mf2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                     vbfloat16mf2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                     vbfloat16mf2x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                     vbfloat16mf2x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                     vbfloat16mf2x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                     vbfloat16mf2x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_v_bf16m1x2_mu(vbool16_t vm,
+                                                   vbfloat16m1x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_v_bf16m1x3_mu(vbool16_t vm,
+                                                   vbfloat16m1x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_v_bf16m1x4_mu(vbool16_t vm,
+                                                   vbfloat16m1x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_v_bf16m1x5_mu(vbool16_t vm,
+                                                   vbfloat16m1x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_v_bf16m1x6_mu(vbool16_t vm,
+                                                   vbfloat16m1x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_v_bf16m1x7_mu(vbool16_t vm,
+                                                   vbfloat16m1x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_v_bf16m1x8_mu(vbool16_t vm,
+                                                   vbfloat16m1x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_v_bf16m2x2_mu(vbool8_t vm,
+                                                   vbfloat16m2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_v_bf16m2x3_mu(vbool8_t vm,
+                                                   vbfloat16m2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_v_bf16m2x4_mu(vbool8_t vm,
+                                                   vbfloat16m2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_v_bf16m4x2_mu(vbool4_t vm,
+                                                   vbfloat16m4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m4_t rs2, size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                     vbfloat16mf4x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                     vbfloat16mf4x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                     vbfloat16mf4x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                     vbfloat16mf4x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                     vbfloat16mf4x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                     vbfloat16mf4x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                     vbfloat16mf4x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf4_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                     vbfloat16mf2x2_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                     vbfloat16mf2x3_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                     vbfloat16mf2x4_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                     vbfloat16mf2x5_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                     vbfloat16mf2x6_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                     vbfloat16mf2x7_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                     vbfloat16mf2x8_t vd,
+                                                     const __bf16 *rs1,
+                                                     vuint16mf2_t rs2,
+                                                     size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_v_bf16m1x2_mu(vbool16_t vm,
+                                                   vbfloat16m1x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_v_bf16m1x3_mu(vbool16_t vm,
+                                                   vbfloat16m1x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_v_bf16m1x4_mu(vbool16_t vm,
+                                                   vbfloat16m1x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_v_bf16m1x5_mu(vbool16_t vm,
+                                                   vbfloat16m1x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_v_bf16m1x6_mu(vbool16_t vm,
+                                                   vbfloat16m1x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_v_bf16m1x7_mu(vbool16_t vm,
+                                                   vbfloat16m1x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_v_bf16m1x8_mu(vbool16_t vm,
+                                                   vbfloat16m1x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_v_bf16m2x2_mu(vbool8_t vm,
+                                                   vbfloat16m2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_v_bf16m2x3_mu(vbool8_t vm,
+                                                   vbfloat16m2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_v_bf16m2x4_mu(vbool8_t vm,
+                                                   vbfloat16m2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_v_bf16m4x2_mu(vbool4_t vm,
+                                                   vbfloat16m4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16m4_t rs2, size_t vl);
+----
+
+[[policy-variant-vector-indexed-segment-store]]
+==== Vector Indexed Segment Store Intrinsics
+Intrinsics here don't have a policy variant.
diff --git a/auto-generated/bfloat16/policy_funcs/intrinsic_funcs/02_bfloat16_convert_intrinsics.adoc b/auto-generated/bfloat16/policy_funcs/intrinsic_funcs/02_bfloat16_convert_intrinsics.adoc
new file mode 100644
index 000000000..c807ad197
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/intrinsic_funcs/02_bfloat16_convert_intrinsics.adoc
@@ -0,0 +1,220 @@
+
+=== BFloat16 Convert Intrinsics
+
+[[policy-variant-bf16-vector-narrow-convert]]
+==== Vector Narrowing Convert Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_tu(vbfloat16mf4_t vd,
+                                                   vfloat32mf2_t vs2,
+                                                   size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_tu(vbfloat16mf2_t vd,
+                                                   vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_tu(vbfloat16m1_t vd,
+                                                 vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_tu(vbfloat16m2_t vd,
+                                                 vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_tu(vbfloat16m4_t vd,
+                                                 vfloat32m8_t vs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_tum(vbool64_t vm,
+                                                    vbfloat16mf4_t vd,
+                                                    vfloat32mf2_t vs2,
+                                                    size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_tum(vbool32_t vm,
+                                                    vbfloat16mf2_t vd,
+                                                    vfloat32m1_t vs2,
+                                                    size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_tum(vbool16_t vm,
+                                                  vbfloat16m1_t vd,
+                                                  vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                                  vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                                  vfloat32m8_t vs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_tumu(vbool64_t vm,
+                                                     vbfloat16mf4_t vd,
+                                                     vfloat32mf2_t vs2,
+                                                     size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_tumu(vbool32_t vm,
+                                                     vbfloat16mf2_t vd,
+                                                     vfloat32m1_t vs2,
+                                                     size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_tumu(vbool16_t vm,
+                                                   vbfloat16m1_t vd,
+                                                   vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_tumu(vbool8_t vm,
+                                                   vbfloat16m2_t vd,
+                                                   vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_tumu(vbool4_t vm,
+                                                   vbfloat16m4_t vd,
+                                                   vfloat32m8_t vs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_mu(vbool64_t vm,
+                                                   vbfloat16mf4_t vd,
+                                                   vfloat32mf2_t vs2,
+                                                   size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_mu(vbool32_t vm,
+                                                   vbfloat16mf2_t vd,
+                                                   vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                                 vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                                 vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                                 vfloat32m8_t vs2, size_t vl);
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_tu(vbfloat16mf4_t vd,
+                                                      vfloat32mf2_t vs2,
+                                                      unsigned int frm,
+                                                      size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_tu(vbfloat16mf2_t vd,
+                                                      vfloat32m1_t vs2,
+                                                      unsigned int frm,
+                                                      size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_rm_tu(vbfloat16m1_t vd,
+                                                    vfloat32m2_t vs2,
+                                                    unsigned int frm,
+                                                    size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_rm_tu(vbfloat16m2_t vd,
+                                                    vfloat32m4_t vs2,
+                                                    unsigned int frm,
+                                                    size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_rm_tu(vbfloat16m4_t vd,
+                                                    vfloat32m8_t vs2,
+                                                    unsigned int frm,
+                                                    size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_tum(vbool64_t vm,
+                                                       vbfloat16mf4_t vd,
+                                                       vfloat32mf2_t vs2,
+                                                       unsigned int frm,
+                                                       size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_tum(vbool32_t vm,
+                                                       vbfloat16mf2_t vd,
+                                                       vfloat32m1_t vs2,
+                                                       unsigned int frm,
+                                                       size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_rm_tum(vbool16_t vm,
+                                                     vbfloat16m1_t vd,
+                                                     vfloat32m2_t vs2,
+                                                     unsigned int frm,
+                                                     size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_rm_tum(vbool8_t vm,
+                                                     vbfloat16m2_t vd,
+                                                     vfloat32m4_t vs2,
+                                                     unsigned int frm,
+                                                     size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_rm_tum(vbool4_t vm,
+                                                     vbfloat16m4_t vd,
+                                                     vfloat32m8_t vs2,
+                                                     unsigned int frm,
+                                                     size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_tumu(vbool64_t vm,
+                                                        vbfloat16mf4_t vd,
+                                                        vfloat32mf2_t vs2,
+                                                        unsigned int frm,
+                                                        size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_tumu(vbool32_t vm,
+                                                        vbfloat16mf2_t vd,
+                                                        vfloat32m1_t vs2,
+                                                        unsigned int frm,
+                                                        size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_rm_tumu(vbool16_t vm,
+                                                      vbfloat16m1_t vd,
+                                                      vfloat32m2_t vs2,
+                                                      unsigned int frm,
+                                                      size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_rm_tumu(vbool8_t vm,
+                                                      vbfloat16m2_t vd,
+                                                      vfloat32m4_t vs2,
+                                                      unsigned int frm,
+                                                      size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_rm_tumu(vbool4_t vm,
+                                                      vbfloat16m4_t vd,
+                                                      vfloat32m8_t vs2,
+                                                      unsigned int frm,
+                                                      size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_mu(vbool64_t vm,
+                                                      vbfloat16mf4_t vd,
+                                                      vfloat32mf2_t vs2,
+                                                      unsigned int frm,
+                                                      size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_mu(vbool32_t vm,
+                                                      vbfloat16mf2_t vd,
+                                                      vfloat32m1_t vs2,
+                                                      unsigned int frm,
+                                                      size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_f_w_bf16m1_rm_mu(vbool16_t vm,
+                                                    vbfloat16m1_t vd,
+                                                    vfloat32m2_t vs2,
+                                                    unsigned int frm,
+                                                    size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_f_w_bf16m2_rm_mu(vbool8_t vm,
+                                                    vbfloat16m2_t vd,
+                                                    vfloat32m4_t vs2,
+                                                    unsigned int frm,
+                                                    size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_f_w_bf16m4_rm_mu(vbool4_t vm,
+                                                    vbfloat16m4_t vd,
+                                                    vfloat32m8_t vs2,
+                                                    unsigned int frm,
+                                                    size_t vl);
+----
+
+[[policy-variant-bf16-vector-widening-convert]]
+==== Vector Widening Convert Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwcvtbf16_f_f_v_f32mf2_tu(vfloat32mf2_t vd,
+                                                 vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_f_v_f32m1_tu(vfloat32m1_t vd,
+                                               vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_f_v_f32m2_tu(vfloat32m2_t vd,
+                                               vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_f_v_f32m4_tu(vfloat32m4_t vd,
+                                               vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_f_v_f32m8_tu(vfloat32m8_t vd,
+                                               vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f_f_v_f32mf2_tum(vbool64_t vm,
+                                                  vfloat32mf2_t vd,
+                                                  vbfloat16mf4_t vs2,
+                                                  size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_f_v_f32m1_tum(vbool32_t vm, vfloat32m1_t vd,
+                                                vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_f_v_f32m2_tum(vbool16_t vm, vfloat32m2_t vd,
+                                                vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_f_v_f32m4_tum(vbool8_t vm, vfloat32m4_t vd,
+                                                vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_f_v_f32m8_tum(vbool4_t vm, vfloat32m8_t vd,
+                                                vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f_f_v_f32mf2_tumu(vbool64_t vm,
+                                                   vfloat32mf2_t vd,
+                                                   vbfloat16mf4_t vs2,
+                                                   size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_f_v_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                                 vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_f_v_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                                 vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_f_v_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                                 vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_f_v_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                                 vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f_f_v_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                                 vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_f_v_f32m1_mu(vbool32_t vm, vfloat32m1_t vd,
+                                               vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_f_v_f32m2_mu(vbool16_t vm, vfloat32m2_t vd,
+                                               vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_f_v_f32m4_mu(vbool8_t vm, vfloat32m4_t vd,
+                                               vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_f_v_f32m8_mu(vbool4_t vm, vfloat32m8_t vd,
+                                               vbfloat16m4_t vs2, size_t vl);
+----
diff --git a/auto-generated/bfloat16/policy_funcs/intrinsic_funcs/03_bfloat16_arithmetic_intrinsics.adoc b/auto-generated/bfloat16/policy_funcs/intrinsic_funcs/03_bfloat16_arithmetic_intrinsics.adoc
new file mode 100644
index 000000000..15acd4a2c
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/intrinsic_funcs/03_bfloat16_arithmetic_intrinsics.adoc
@@ -0,0 +1,273 @@
+
+=== BFloat16 Arithmetic Intrinsics
+
+[[policy-variant-bf16-widening-multiply-accumulate]]
+==== Vector Widening Multiply-Accumulate Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_tu(vfloat32mf2_t vd,
+                                               vbfloat16mf4_t vs1,
+                                               vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_tu(vfloat32mf2_t vd, __bf16 vs1,
+                                               vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_tu(vfloat32m1_t vd,
+                                             vbfloat16mf2_t vs1,
+                                             vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_tu(vfloat32m1_t vd, __bf16 vs1,
+                                             vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_tu(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                             vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_tu(vfloat32m2_t vd, __bf16 vs1,
+                                             vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_tu(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                             vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_tu(vfloat32m4_t vd, __bf16 vs1,
+                                             vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_tu(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                             vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_tu(vfloat32m8_t vd, __bf16 vs1,
+                                             vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                                vbfloat16mf4_t vs1,
+                                                vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                                __bf16 vs1, vbfloat16mf4_t vs2,
+                                                size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_tum(vbool32_t vm, vfloat32m1_t vd,
+                                              vbfloat16mf2_t vs1,
+                                              vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_tum(vbool32_t vm, vfloat32m1_t vd,
+                                              __bf16 vs1, vbfloat16mf2_t vs2,
+                                              size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_tum(vbool16_t vm, vfloat32m2_t vd,
+                                              vbfloat16m1_t vs1,
+                                              vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_tum(vbool16_t vm, vfloat32m2_t vd,
+                                              __bf16 vs1, vbfloat16m1_t vs2,
+                                              size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_tum(vbool8_t vm, vfloat32m4_t vd,
+                                              vbfloat16m2_t vs1,
+                                              vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_tum(vbool8_t vm, vfloat32m4_t vd,
+                                              __bf16 vs1, vbfloat16m2_t vs2,
+                                              size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_tum(vbool4_t vm, vfloat32m8_t vd,
+                                              vbfloat16m4_t vs1,
+                                              vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_tum(vbool4_t vm, vfloat32m8_t vd,
+                                              __bf16 vs1, vbfloat16m4_t vs2,
+                                              size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                                 vbfloat16mf4_t vs1,
+                                                 vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                                 __bf16 vs1, vbfloat16mf4_t vs2,
+                                                 size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                               vbfloat16mf2_t vs1,
+                                               vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                               __bf16 vs1, vbfloat16mf2_t vs2,
+                                               size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                               vbfloat16m1_t vs1,
+                                               vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                               __bf16 vs1, vbfloat16m1_t vs2,
+                                               size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                               vbfloat16m2_t vs1,
+                                               vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                               __bf16 vs1, vbfloat16m2_t vs2,
+                                               size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                               vbfloat16m4_t vs1,
+                                               vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                               __bf16 vs1, vbfloat16m4_t vs2,
+                                               size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                               vbfloat16mf4_t vs1,
+                                               vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                               __bf16 vs1, vbfloat16mf4_t vs2,
+                                               size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_mu(vbool32_t vm, vfloat32m1_t vd,
+                                             vbfloat16mf2_t vs1,
+                                             vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_mu(vbool32_t vm, vfloat32m1_t vd,
+                                             __bf16 vs1, vbfloat16mf2_t vs2,
+                                             size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_mu(vbool16_t vm, vfloat32m2_t vd,
+                                             vbfloat16m1_t vs1,
+                                             vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_mu(vbool16_t vm, vfloat32m2_t vd,
+                                             __bf16 vs1, vbfloat16m1_t vs2,
+                                             size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_mu(vbool8_t vm, vfloat32m4_t vd,
+                                             vbfloat16m2_t vs1,
+                                             vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_mu(vbool8_t vm, vfloat32m4_t vd,
+                                             __bf16 vs1, vbfloat16m2_t vs2,
+                                             size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_mu(vbool4_t vm, vfloat32m8_t vd,
+                                             vbfloat16m4_t vs1,
+                                             vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_mu(vbool4_t vm, vfloat32m8_t vd,
+                                             __bf16 vs1, vbfloat16m4_t vs2,
+                                             size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_rm_tu(vfloat32mf2_t vd,
+                                                  vbfloat16mf4_t vs1,
+                                                  vbfloat16mf4_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_rm_tu(vfloat32mf2_t vd, __bf16 vs1,
+                                                  vbfloat16mf4_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_rm_tu(vfloat32m1_t vd,
+                                                vbfloat16mf2_t vs1,
+                                                vbfloat16mf2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_rm_tu(vfloat32m1_t vd, __bf16 vs1,
+                                                vbfloat16mf2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_rm_tu(vfloat32m2_t vd,
+                                                vbfloat16m1_t vs1,
+                                                vbfloat16m1_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_rm_tu(vfloat32m2_t vd, __bf16 vs1,
+                                                vbfloat16m1_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_rm_tu(vfloat32m4_t vd,
+                                                vbfloat16m2_t vs1,
+                                                vbfloat16m2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_rm_tu(vfloat32m4_t vd, __bf16 vs1,
+                                                vbfloat16m2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_rm_tu(vfloat32m8_t vd,
+                                                vbfloat16m4_t vs1,
+                                                vbfloat16m4_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_rm_tu(vfloat32m8_t vd, __bf16 vs1,
+                                                vbfloat16m4_t vs2,
+                                                unsigned int frm, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_rm_tum(vbool64_t vm,
+                                                   vfloat32mf2_t vd,
+                                                   vbfloat16mf4_t vs1,
+                                                   vbfloat16mf4_t vs2,
+                                                   unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_rm_tum(vbool64_t vm,
+                                                   vfloat32mf2_t vd, __bf16 vs1,
+                                                   vbfloat16mf4_t vs2,
+                                                   unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_rm_tum(vbool32_t vm, vfloat32m1_t vd,
+                                                 vbfloat16mf2_t vs1,
+                                                 vbfloat16mf2_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_rm_tum(vbool32_t vm, vfloat32m1_t vd,
+                                                 __bf16 vs1, vbfloat16mf2_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_rm_tum(vbool16_t vm, vfloat32m2_t vd,
+                                                 vbfloat16m1_t vs1,
+                                                 vbfloat16m1_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_rm_tum(vbool16_t vm, vfloat32m2_t vd,
+                                                 __bf16 vs1, vbfloat16m1_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_rm_tum(vbool8_t vm, vfloat32m4_t vd,
+                                                 vbfloat16m2_t vs1,
+                                                 vbfloat16m2_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_rm_tum(vbool8_t vm, vfloat32m4_t vd,
+                                                 __bf16 vs1, vbfloat16m2_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_rm_tum(vbool4_t vm, vfloat32m8_t vd,
+                                                 vbfloat16m4_t vs1,
+                                                 vbfloat16m4_t vs2,
+                                                 unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_rm_tum(vbool4_t vm, vfloat32m8_t vd,
+                                                 __bf16 vs1, vbfloat16m4_t vs2,
+                                                 unsigned int frm, size_t vl);
+// masked functions
+vfloat32mf2_t
+__riscv_vfwmaccbf16_vv_f32mf2_rm_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                      vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32mf2_t
+__riscv_vfwmaccbf16_vf_f32mf2_rm_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                      __bf16 vs1, vbfloat16mf4_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_rm_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                                  vbfloat16mf2_t vs1,
+                                                  vbfloat16mf2_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_rm_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                                  __bf16 vs1,
+                                                  vbfloat16mf2_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_rm_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                                  vbfloat16m1_t vs1,
+                                                  vbfloat16m1_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_rm_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                                  __bf16 vs1, vbfloat16m1_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_rm_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                                  vbfloat16m2_t vs1,
+                                                  vbfloat16m2_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_rm_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                                  __bf16 vs1, vbfloat16m2_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_rm_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                                  vbfloat16m4_t vs1,
+                                                  vbfloat16m4_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_rm_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                                  __bf16 vs1, vbfloat16m4_t vs2,
+                                                  unsigned int frm, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_vv_f32mf2_rm_mu(vbool64_t vm,
+                                                  vfloat32mf2_t vd,
+                                                  vbfloat16mf4_t vs1,
+                                                  vbfloat16mf4_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_vf_f32mf2_rm_mu(vbool64_t vm,
+                                                  vfloat32mf2_t vd, __bf16 vs1,
+                                                  vbfloat16mf4_t vs2,
+                                                  unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vv_f32m1_rm_mu(vbool32_t vm, vfloat32m1_t vd,
+                                                vbfloat16mf2_t vs1,
+                                                vbfloat16mf2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_vf_f32m1_rm_mu(vbool32_t vm, vfloat32m1_t vd,
+                                                __bf16 vs1, vbfloat16mf2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vv_f32m2_rm_mu(vbool16_t vm, vfloat32m2_t vd,
+                                                vbfloat16m1_t vs1,
+                                                vbfloat16m1_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_vf_f32m2_rm_mu(vbool16_t vm, vfloat32m2_t vd,
+                                                __bf16 vs1, vbfloat16m1_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vv_f32m4_rm_mu(vbool8_t vm, vfloat32m4_t vd,
+                                                vbfloat16m2_t vs1,
+                                                vbfloat16m2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_vf_f32m4_rm_mu(vbool8_t vm, vfloat32m4_t vd,
+                                                __bf16 vs1, vbfloat16m2_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vv_f32m8_rm_mu(vbool4_t vm, vfloat32m8_t vd,
+                                                vbfloat16m4_t vs1,
+                                                vbfloat16m4_t vs2,
+                                                unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_vf_f32m8_rm_mu(vbool4_t vm, vfloat32m8_t vd,
+                                                __bf16 vs1, vbfloat16m4_t vs2,
+                                                unsigned int frm, size_t vl);
+----
diff --git a/auto-generated/bfloat16/policy_funcs/intrinsic_funcs/04_bfloat16_miscellaneous_vector_utility_intrinsics.adoc b/auto-generated/bfloat16/policy_funcs/intrinsic_funcs/04_bfloat16_miscellaneous_vector_utility_intrinsics.adoc
new file mode 100644
index 000000000..363b02828
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/intrinsic_funcs/04_bfloat16_miscellaneous_vector_utility_intrinsics.adoc
@@ -0,0 +1,30 @@
+
+=== BFloat16 Miscellaneous Vector Utility Intrinsics
+
+[[policy-variant-reinterpret-cast-conversion]]
+==== Reinterpret Cast Conversion Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-lmul-extensionn]]
+==== Vector LMUL Extension Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-lmul-truncation]]
+==== Vector LMUL Truncation Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-initialization]]
+==== Vector Initialization Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-insertion]]
+==== Vector Insertion Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-extraction]]
+==== Vector Extraction Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-vector-creation]]
+==== Vector Creation Intrinsics
+Intrinsics here don't have a policy variant.
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vfncvtbf16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vfncvtbf16.c
new file mode 100644
index 000000000..83ce220b6
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vfncvtbf16.c
@@ -0,0 +1,169 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_tu(vbfloat16mf4_t vd, vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_tu(vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_tu(vbfloat16mf2_t vd, vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_tu(vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_tu(vbfloat16m1_t vd, vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_tu(vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_tu(vbfloat16m2_t vd, vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_tu(vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_tu(vbfloat16m4_t vd, vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_tu(vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_tu(vbfloat16mf4_t vd, vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_tu(vbfloat16mf2_t vd, vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_tu(vbfloat16m1_t vd, vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_rm_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_tu(vbfloat16m2_t vd, vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_rm_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_tu(vbfloat16m4_t vd, vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_rm_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_tum(vbool64_t vm, vbfloat16mf4_t vd, vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_tum(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_tum(vbool32_t vm, vbfloat16mf2_t vd, vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_tum(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_tum(vbool16_t vm, vbfloat16m1_t vd, vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_rm_tum(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_tum(vbool8_t vm, vbfloat16m2_t vd, vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_rm_tum(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_tum(vbool4_t vm, vbfloat16m4_t vd, vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_rm_tum(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_tumu(vbool64_t vm, vbfloat16mf4_t vd, vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_tumu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_tumu(vbool32_t vm, vbfloat16mf2_t vd, vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_tumu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_tumu(vbool16_t vm, vbfloat16m1_t vd, vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_rm_tumu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_tumu(vbool8_t vm, vbfloat16m2_t vd, vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_rm_tumu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_tumu(vbool4_t vm, vbfloat16m4_t vd, vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_rm_tumu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_mu(vbool64_t vm, vbfloat16mf4_t vd, vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_mu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_mu(vbool32_t vm, vbfloat16mf2_t vd, vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_mu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_mu(vbool16_t vm, vbfloat16m1_t vd, vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m1_rm_mu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_mu(vbool8_t vm, vbfloat16m2_t vd, vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m2_rm_mu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_mu(vbool4_t vm, vbfloat16m4_t vd, vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_f_w_bf16m4_rm_mu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vfwcvtbf16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vfwcvtbf16.c
new file mode 100644
index 000000000..8d122d46d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vfwcvtbf16.c
@@ -0,0 +1,89 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_tu(vfloat32mf2_t vd, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32mf2_tu(vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_tu(vfloat32m1_t vd, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m1_tu(vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_tu(vfloat32m2_t vd, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m2_tu(vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_tu(vfloat32m4_t vd, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m4_tu(vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_tu(vfloat32m8_t vd, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m8_tu(vd, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32mf2_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_tum(vbool32_t vm, vfloat32m1_t vd, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m1_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_tum(vbool16_t vm, vfloat32m2_t vd, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m2_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_tum(vbool8_t vm, vfloat32m4_t vd, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m4_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_tum(vbool4_t vm, vfloat32m8_t vd, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m8_tum(vm, vd, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32mf2_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m1_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m2_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m4_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m8_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32mf2_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_mu(vbool32_t vm, vfloat32m1_t vd, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m1_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_mu(vbool16_t vm, vfloat32m2_t vd, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m2_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_mu(vbool8_t vm, vfloat32m4_t vd, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m4_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_mu(vbool4_t vm, vfloat32m8_t vd, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_f_v_f32m8_mu(vm, vd, vs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vfwmaccbf16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vfwmaccbf16.c
new file mode 100644
index 000000000..d75f88788
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vfwmaccbf16.c
@@ -0,0 +1,329 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_tu(vfloat32mf2_t vd, vbfloat16mf4_t vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_tu(vfloat32mf2_t vd, __bf16 vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_tu(vfloat32m1_t vd, vbfloat16mf2_t vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_tu(vfloat32m1_t vd, __bf16 vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_tu(vfloat32m2_t vd, vbfloat16m1_t vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_tu(vfloat32m2_t vd, __bf16 vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_tu(vfloat32m4_t vd, vbfloat16m2_t vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_tu(vfloat32m4_t vd, __bf16 vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_tu(vfloat32m8_t vd, vbfloat16m4_t vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_tu(vfloat32m8_t vd, __bf16 vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd, vbfloat16mf4_t vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd, __bf16 vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_tum(vbool32_t vm, vfloat32m1_t vd, vbfloat16mf2_t vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_tum(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_tum(vbool16_t vm, vfloat32m2_t vd, vbfloat16m1_t vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_tum(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_tum(vbool8_t vm, vfloat32m4_t vd, vbfloat16m2_t vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_tum(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_tum(vbool4_t vm, vfloat32m8_t vd, vbfloat16m4_t vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_tum(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd, vbfloat16mf4_t vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd, __bf16 vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd, vbfloat16mf2_t vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd, vbfloat16m1_t vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd, vbfloat16m2_t vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd, vbfloat16m4_t vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd, vbfloat16mf4_t vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd, __bf16 vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_mu(vbool32_t vm, vfloat32m1_t vd, vbfloat16mf2_t vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_mu(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_mu(vbool16_t vm, vfloat32m2_t vd, vbfloat16m1_t vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_mu(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_mu(vbool8_t vm, vfloat32m4_t vd, vbfloat16m2_t vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_mu(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_mu(vbool4_t vm, vfloat32m8_t vd, vbfloat16m4_t vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_mu(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_tu(vfloat32mf2_t vd, vbfloat16mf4_t vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_tu(vfloat32mf2_t vd, __bf16 vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_tu(vfloat32m1_t vd, vbfloat16mf2_t vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_tu(vfloat32m1_t vd, __bf16 vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_tu(vfloat32m2_t vd, vbfloat16m1_t vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_tu(vfloat32m2_t vd, __bf16 vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_tu(vfloat32m4_t vd, vbfloat16m2_t vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_tu(vfloat32m4_t vd, __bf16 vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_tu(vfloat32m8_t vd, vbfloat16m4_t vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_tu(vfloat32m8_t vd, __bf16 vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_rm_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_tum(vbool64_t vm, vfloat32mf2_t vd, vbfloat16mf4_t vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_tum(vbool64_t vm, vfloat32mf2_t vd, __bf16 vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_tum(vbool32_t vm, vfloat32m1_t vd, vbfloat16mf2_t vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_tum(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_tum(vbool16_t vm, vfloat32m2_t vd, vbfloat16m1_t vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_tum(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_tum(vbool8_t vm, vfloat32m4_t vd, vbfloat16m2_t vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_tum(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_tum(vbool4_t vm, vfloat32m8_t vd, vbfloat16m4_t vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_tum(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_rm_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_tumu(vbool64_t vm, vfloat32mf2_t vd, vbfloat16mf4_t vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_tumu(vbool64_t vm, vfloat32mf2_t vd, __bf16 vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_tumu(vbool32_t vm, vfloat32m1_t vd, vbfloat16mf2_t vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_tumu(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_tumu(vbool16_t vm, vfloat32m2_t vd, vbfloat16m1_t vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_tumu(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_tumu(vbool8_t vm, vfloat32m4_t vd, vbfloat16m2_t vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_tumu(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_tumu(vbool4_t vm, vfloat32m8_t vd, vbfloat16m4_t vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_tumu(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_rm_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_mu(vbool64_t vm, vfloat32mf2_t vd, vbfloat16mf4_t vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32mf2_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_mu(vbool64_t vm, vfloat32mf2_t vd, __bf16 vs1, vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32mf2_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_mu(vbool32_t vm, vfloat32m1_t vd, vbfloat16mf2_t vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m1_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_mu(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1, vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m1_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_mu(vbool16_t vm, vfloat32m2_t vd, vbfloat16m1_t vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m2_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_mu(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1, vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m2_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_mu(vbool8_t vm, vfloat32m4_t vd, vbfloat16m2_t vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m4_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_mu(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1, vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m4_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_mu(vbool4_t vm, vfloat32m8_t vd, vbfloat16m4_t vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vv_f32m8_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_mu(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1, vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_vf_f32m8_rm_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vle16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vle16.c
new file mode 100644
index 000000000..b3fb167d5
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vle16.c
@@ -0,0 +1,105 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf4_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf2_tu(vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m1_tu(vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m2_tu(vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m4_tu(vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m8_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf4_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf2_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m1_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m2_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m4_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m8_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf4_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf2_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m1_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m2_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m4_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m8_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf4_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16mf2_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m1_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m2_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m4_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_v_bf16m8_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vle16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vle16ff.c
new file mode 100644
index 000000000..56c726802
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vle16ff.c
@@ -0,0 +1,105 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16mf4_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16mf2_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m1_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m2_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m4_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m8_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16mf4_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16mf2_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m1_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m2_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m4_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m8_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16mf4_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16mf2_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m1_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m2_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m4_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m8_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16mf4_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16mf2_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m1_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m2_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m4_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_v_bf16m8_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxei16.c
new file mode 100644
index 000000000..cfd980a4c
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxei16.c
@@ -0,0 +1,105 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16mf4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16mf2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m1_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1, vuint16m8_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16mf4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16mf2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m1_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, vuint16m8_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16mf4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16mf2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m1_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, vuint16m8_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16mf4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16mf2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m1_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, vuint16m8_t rs2, size_t vl) {
+  return __riscv_vloxei16_v_bf16m8_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg2ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg2ei16.c
new file mode 100644
index 000000000..5c08a6033
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg2ei16.c
@@ -0,0 +1,89 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf4x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf2x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_tu(vbfloat16m1x2_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m1x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_tu(vbfloat16m2x2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m2x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_tu(vbfloat16m4x2_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m4x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_tum(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf4x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_tum(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf2x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m1x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m2x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m4x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_tumu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf4x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_tumu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf2x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_tumu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m1x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m2x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_tumu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m4x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_mu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf4x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_mu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16mf2x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m1x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m2x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_v_bf16m4x2_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg3ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg3ei16.c
new file mode 100644
index 000000000..7d0430ef0
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg3ei16.c
@@ -0,0 +1,73 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf4x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf2x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_tu(vbfloat16m1x3_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m1x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_tu(vbfloat16m2x3_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m2x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_tum(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf4x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_tum(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf2x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m1x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m2x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_tumu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf4x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_tumu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf2x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_tumu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m1x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m2x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_mu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf4x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_mu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16mf2x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m1x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_v_bf16m2x3_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg4ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg4ei16.c
new file mode 100644
index 000000000..a1efb7a50
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg4ei16.c
@@ -0,0 +1,73 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf4x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf2x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_tu(vbfloat16m1x4_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m1x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_tu(vbfloat16m2x4_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m2x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_tum(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf4x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_tum(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf2x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m1x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m2x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_tumu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf4x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_tumu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf2x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_tumu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m1x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m2x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_mu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf4x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_mu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16mf2x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m1x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_v_bf16m2x4_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg5ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg5ei16.c
new file mode 100644
index 000000000..4ab0d7765
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg5ei16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf4x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf2x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_tu(vbfloat16m1x5_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16m1x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_tum(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf4x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_tum(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf2x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16m1x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_tumu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf4x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_tumu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf2x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_tumu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16m1x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_mu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf4x5_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_mu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16mf2x5_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_v_bf16m1x5_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg6ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg6ei16.c
new file mode 100644
index 000000000..f70929941
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg6ei16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf4x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf2x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_tu(vbfloat16m1x6_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16m1x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_tum(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf4x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_tum(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf2x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16m1x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_tumu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf4x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_tumu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf2x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_tumu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16m1x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_mu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf4x6_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_mu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16mf2x6_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_v_bf16m1x6_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg7ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg7ei16.c
new file mode 100644
index 000000000..0acfa2ed3
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg7ei16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf4x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf2x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_tu(vbfloat16m1x7_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16m1x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_tum(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf4x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_tum(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf2x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16m1x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_tumu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf4x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_tumu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf2x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_tumu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16m1x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_mu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf4x7_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_mu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16mf2x7_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_v_bf16m1x7_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg8ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg8ei16.c
new file mode 100644
index 000000000..9be5f86ee
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vloxseg8ei16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf4x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf2x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_tu(vbfloat16m1x8_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16m1x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_tum(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf4x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_tum(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf2x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16m1x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_tumu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf4x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_tumu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf2x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_tumu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16m1x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_mu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf4x8_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_mu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16mf2x8_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_v_bf16m1x8_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlse16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlse16.c
new file mode 100644
index 000000000..b8a27eb29
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlse16.c
@@ -0,0 +1,105 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16mf4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16mf2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m1_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16mf4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16mf2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m1_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16mf4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16mf2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m1_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16mf4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16mf2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m1_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_v_bf16m8_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg2e16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg2e16.c
new file mode 100644
index 000000000..9b875c7a7
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg2e16.c
@@ -0,0 +1,89 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf4x2_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf2x2_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_tu(vbfloat16m1x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m1x2_tu(vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_tu(vbfloat16m2x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m2x2_tu(vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_tu(vbfloat16m4x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m4x2_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_tum(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf4x2_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_tum(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf2x2_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m1x2_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m2x2_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m4x2_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_tumu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf4x2_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_tumu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf2x2_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_tumu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m1x2_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m2x2_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_tumu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m4x2_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_mu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf4x2_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_mu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16mf2x2_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m1x2_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m2x2_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_v_bf16m4x2_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg2e16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg2e16ff.c
new file mode 100644
index 000000000..a3fcb918e
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg2e16ff.c
@@ -0,0 +1,89 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf4x2_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf2x2_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_tu(vbfloat16m1x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m1x2_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_tu(vbfloat16m2x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m2x2_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_tu(vbfloat16m4x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m4x2_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_tum(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf4x2_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_tum(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf2x2_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m1x2_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m2x2_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m4x2_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_tumu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf4x2_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_tumu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf2x2_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_tumu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m1x2_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m2x2_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_tumu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m4x2_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_mu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf4x2_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_mu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16mf2x2_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m1x2_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m2x2_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_v_bf16m4x2_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg3e16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg3e16.c
new file mode 100644
index 000000000..524c2e04d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg3e16.c
@@ -0,0 +1,73 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf4x3_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf2x3_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_tu(vbfloat16m1x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m1x3_tu(vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_tu(vbfloat16m2x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m2x3_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_tum(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf4x3_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_tum(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf2x3_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m1x3_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m2x3_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_tumu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf4x3_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_tumu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf2x3_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_tumu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m1x3_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m2x3_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_mu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf4x3_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_mu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16mf2x3_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m1x3_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_v_bf16m2x3_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg3e16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg3e16ff.c
new file mode 100644
index 000000000..4d7259c39
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg3e16ff.c
@@ -0,0 +1,73 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf4x3_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf2x3_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_tu(vbfloat16m1x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m1x3_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_tu(vbfloat16m2x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m2x3_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_tum(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf4x3_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_tum(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf2x3_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m1x3_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m2x3_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_tumu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf4x3_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_tumu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf2x3_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_tumu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m1x3_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m2x3_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_mu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf4x3_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_mu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16mf2x3_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m1x3_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_v_bf16m2x3_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg4e16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg4e16.c
new file mode 100644
index 000000000..911393203
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg4e16.c
@@ -0,0 +1,73 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf4x4_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf2x4_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_tu(vbfloat16m1x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m1x4_tu(vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_tu(vbfloat16m2x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m2x4_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_tum(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf4x4_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_tum(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf2x4_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m1x4_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m2x4_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_tumu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf4x4_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_tumu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf2x4_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_tumu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m1x4_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m2x4_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_mu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf4x4_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_mu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16mf2x4_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m1x4_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_v_bf16m2x4_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg4e16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg4e16ff.c
new file mode 100644
index 000000000..9be138700
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg4e16ff.c
@@ -0,0 +1,73 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf4x4_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf2x4_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_tu(vbfloat16m1x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m1x4_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_tu(vbfloat16m2x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m2x4_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_tum(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf4x4_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_tum(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf2x4_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m1x4_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m2x4_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_tumu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf4x4_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_tumu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf2x4_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_tumu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m1x4_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m2x4_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_mu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf4x4_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_mu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16mf2x4_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m1x4_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_v_bf16m2x4_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg5e16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg5e16.c
new file mode 100644
index 000000000..1733dfada
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg5e16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf4x5_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf2x5_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_tu(vbfloat16m1x5_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16m1x5_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_tum(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf4x5_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_tum(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf2x5_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16m1x5_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_tumu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf4x5_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_tumu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf2x5_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_tumu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16m1x5_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_mu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf4x5_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_mu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16mf2x5_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_v_bf16m1x5_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg5e16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg5e16ff.c
new file mode 100644
index 000000000..20ac32642
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg5e16ff.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf4x5_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf2x5_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_tu(vbfloat16m1x5_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16m1x5_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_tum(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf4x5_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_tum(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf2x5_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16m1x5_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_tumu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf4x5_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_tumu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf2x5_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_tumu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16m1x5_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_mu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf4x5_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_mu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16mf2x5_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_v_bf16m1x5_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg6e16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg6e16.c
new file mode 100644
index 000000000..81aee1fb4
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg6e16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf4x6_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf2x6_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_tu(vbfloat16m1x6_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16m1x6_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_tum(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf4x6_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_tum(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf2x6_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16m1x6_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_tumu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf4x6_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_tumu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf2x6_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_tumu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16m1x6_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_mu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf4x6_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_mu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16mf2x6_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_v_bf16m1x6_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg6e16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg6e16ff.c
new file mode 100644
index 000000000..2f518e669
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg6e16ff.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf4x6_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf2x6_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_tu(vbfloat16m1x6_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16m1x6_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_tum(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf4x6_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_tum(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf2x6_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16m1x6_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_tumu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf4x6_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_tumu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf2x6_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_tumu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16m1x6_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_mu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf4x6_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_mu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16mf2x6_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_v_bf16m1x6_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg7e16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg7e16.c
new file mode 100644
index 000000000..de7cc556d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg7e16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf4x7_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf2x7_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_tu(vbfloat16m1x7_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16m1x7_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_tum(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf4x7_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_tum(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf2x7_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16m1x7_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_tumu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf4x7_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_tumu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf2x7_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_tumu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16m1x7_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_mu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf4x7_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_mu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16mf2x7_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_v_bf16m1x7_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg7e16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg7e16ff.c
new file mode 100644
index 000000000..01d911246
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg7e16ff.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf4x7_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf2x7_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_tu(vbfloat16m1x7_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16m1x7_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_tum(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf4x7_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_tum(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf2x7_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16m1x7_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_tumu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf4x7_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_tumu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf2x7_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_tumu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16m1x7_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_mu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf4x7_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_mu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16mf2x7_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_v_bf16m1x7_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg8e16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg8e16.c
new file mode 100644
index 000000000..65cc25d2f
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg8e16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf4x8_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf2x8_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_tu(vbfloat16m1x8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16m1x8_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_tum(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf4x8_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_tum(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf2x8_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16m1x8_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_tumu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf4x8_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_tumu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf2x8_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_tumu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16m1x8_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_mu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf4x8_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_mu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16mf2x8_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_v_bf16m1x8_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg8e16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg8e16ff.c
new file mode 100644
index 000000000..5131aa333
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlseg8e16ff.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf4x8_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf2x8_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_tu(vbfloat16m1x8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16m1x8_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_tum(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf4x8_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_tum(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf2x8_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16m1x8_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_tumu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf4x8_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_tumu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf2x8_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_tumu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16m1x8_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_mu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf4x8_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_mu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16mf2x8_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_v_bf16m1x8_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg2e16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg2e16.c
new file mode 100644
index 000000000..945a45ea8
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg2e16.c
@@ -0,0 +1,89 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf4x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf2x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_tu(vbfloat16m1x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m1x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_tu(vbfloat16m2x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m2x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_tu(vbfloat16m4x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m4x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_tum(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf4x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_tum(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf2x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m1x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m2x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m4x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_tumu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf4x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_tumu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf2x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_tumu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m1x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m2x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_tumu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m4x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_mu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf4x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_mu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16mf2x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m1x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m2x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_v_bf16m4x2_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg3e16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg3e16.c
new file mode 100644
index 000000000..bc7156ecf
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg3e16.c
@@ -0,0 +1,73 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf4x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf2x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_tu(vbfloat16m1x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m1x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_tu(vbfloat16m2x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m2x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_tum(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf4x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_tum(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf2x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m1x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m2x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_tumu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf4x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_tumu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf2x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_tumu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m1x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m2x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_mu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf4x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_mu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16mf2x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m1x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_v_bf16m2x3_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg4e16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg4e16.c
new file mode 100644
index 000000000..58d9f21a0
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg4e16.c
@@ -0,0 +1,73 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf4x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf2x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_tu(vbfloat16m1x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m1x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_tu(vbfloat16m2x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m2x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_tum(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf4x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_tum(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf2x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m1x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m2x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_tumu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf4x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_tumu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf2x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_tumu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m1x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m2x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_mu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf4x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_mu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16mf2x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m1x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_v_bf16m2x4_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg5e16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg5e16.c
new file mode 100644
index 000000000..89f3d4ee5
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg5e16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf4x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf2x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_tu(vbfloat16m1x5_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16m1x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_tum(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf4x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_tum(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf2x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16m1x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_tumu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf4x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_tumu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf2x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_tumu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16m1x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_mu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf4x5_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_mu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16mf2x5_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_v_bf16m1x5_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg6e16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg6e16.c
new file mode 100644
index 000000000..923639412
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg6e16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf4x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf2x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_tu(vbfloat16m1x6_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16m1x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_tum(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf4x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_tum(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf2x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16m1x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_tumu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf4x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_tumu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf2x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_tumu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16m1x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_mu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf4x6_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_mu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16mf2x6_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_v_bf16m1x6_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg7e16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg7e16.c
new file mode 100644
index 000000000..cfdf51e07
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg7e16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf4x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf2x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_tu(vbfloat16m1x7_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16m1x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_tum(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf4x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_tum(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf2x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16m1x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_tumu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf4x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_tumu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf2x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_tumu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16m1x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_mu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf4x7_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_mu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16mf2x7_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_v_bf16m1x7_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg8e16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg8e16.c
new file mode 100644
index 000000000..37c8ad0f3
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vlsseg8e16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf4x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf2x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_tu(vbfloat16m1x8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16m1x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_tum(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf4x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_tum(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf2x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16m1x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_tumu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf4x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_tumu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf2x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_tumu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16m1x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_mu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf4x8_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_mu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16mf2x8_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_v_bf16m1x8_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxei16.c
new file mode 100644
index 000000000..ee779135d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxei16.c
@@ -0,0 +1,105 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16mf4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16mf2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m1_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1, vuint16m8_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16mf4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16mf2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m1_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, vuint16m8_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16mf4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16mf2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m1_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, vuint16m8_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16mf4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16mf2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m1_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, vuint16m8_t rs2, size_t vl) {
+  return __riscv_vluxei16_v_bf16m8_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg2ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg2ei16.c
new file mode 100644
index 000000000..ae282f833
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg2ei16.c
@@ -0,0 +1,89 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf4x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf2x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_tu(vbfloat16m1x2_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m1x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_tu(vbfloat16m2x2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m2x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_tu(vbfloat16m4x2_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m4x2_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_tum(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf4x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_tum(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf2x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m1x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m2x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m4x2_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_tumu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf4x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_tumu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf2x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_tumu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m1x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m2x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_tumu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m4x2_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_mu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf4x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_mu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16mf2x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m1x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m2x2_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_v_bf16m4x2_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg3ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg3ei16.c
new file mode 100644
index 000000000..92d8c8e6e
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg3ei16.c
@@ -0,0 +1,73 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf4x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf2x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_tu(vbfloat16m1x3_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m1x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_tu(vbfloat16m2x3_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m2x3_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_tum(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf4x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_tum(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf2x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m1x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m2x3_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_tumu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf4x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_tumu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf2x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_tumu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m1x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m2x3_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_mu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf4x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_mu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16mf2x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m1x3_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_v_bf16m2x3_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg4ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg4ei16.c
new file mode 100644
index 000000000..f5cb0229f
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg4ei16.c
@@ -0,0 +1,73 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf4x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf2x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_tu(vbfloat16m1x4_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m1x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_tu(vbfloat16m2x4_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m2x4_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_tum(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf4x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_tum(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf2x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m1x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m2x4_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_tumu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf4x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_tumu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf2x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_tumu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m1x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m2x4_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_mu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf4x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_mu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16mf2x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m1x4_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_v_bf16m2x4_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg5ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg5ei16.c
new file mode 100644
index 000000000..8b30d05fb
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg5ei16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf4x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf2x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_tu(vbfloat16m1x5_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16m1x5_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_tum(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf4x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_tum(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf2x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16m1x5_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_tumu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf4x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_tumu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf2x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_tumu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16m1x5_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_mu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf4x5_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_mu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16mf2x5_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_v_bf16m1x5_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg6ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg6ei16.c
new file mode 100644
index 000000000..c882cb570
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg6ei16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf4x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf2x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_tu(vbfloat16m1x6_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16m1x6_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_tum(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf4x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_tum(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf2x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16m1x6_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_tumu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf4x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_tumu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf2x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_tumu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16m1x6_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_mu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf4x6_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_mu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16mf2x6_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_v_bf16m1x6_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg7ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg7ei16.c
new file mode 100644
index 000000000..dd2a0c5bd
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg7ei16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf4x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf2x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_tu(vbfloat16m1x7_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16m1x7_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_tum(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf4x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_tum(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf2x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16m1x7_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_tumu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf4x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_tumu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf2x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_tumu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16m1x7_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_mu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf4x7_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_mu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16mf2x7_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_v_bf16m1x7_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg8ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg8ei16.c
new file mode 100644
index 000000000..d7c2f914b
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-api-tests/vluxseg8ei16.c
@@ -0,0 +1,57 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf4x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf2x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_tu(vbfloat16m1x8_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16m1x8_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_tum(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf4x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_tum(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf2x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16m1x8_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_tumu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf4x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_tumu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf2x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_tumu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16m1x8_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_mu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf4x8_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_mu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16mf2x8_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_v_bf16m1x8_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vfncvtbf16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vfncvtbf16.c
new file mode 100644
index 000000000..57b08ec56
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vfncvtbf16.c
@@ -0,0 +1,235 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_tu(vbfloat16mf4_t vd,
+                                                vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_tu(vbfloat16mf2_t vd,
+                                                vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_tu(vbfloat16m1_t vd,
+                                              vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_tu(vbfloat16m2_t vd,
+                                              vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_tu(vbfloat16m4_t vd,
+                                              vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_tum(vbool64_t vm,
+                                                 vbfloat16mf4_t vd,
+                                                 vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_tum(vbool32_t vm,
+                                                 vbfloat16mf2_t vd,
+                                                 vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                               vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                               vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                               vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_tumu(vbool64_t vm,
+                                                  vbfloat16mf4_t vd,
+                                                  vfloat32mf2_t vs2,
+                                                  size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_tumu(vbool32_t vm,
+                                                  vbfloat16mf2_t vd,
+                                                  vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                                vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                                vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                                vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                                vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                                vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                              vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                              vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                              vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_tu(vbfloat16mf4_t vd,
+                                                   vfloat32mf2_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_tu(vbfloat16mf2_t vd,
+                                                   vfloat32m1_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_tu(vbfloat16m1_t vd,
+                                                 vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_tu(vbfloat16m2_t vd,
+                                                 vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_tu(vbfloat16m4_t vd,
+                                                 vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_tum(vbool64_t vm,
+                                                    vbfloat16mf4_t vd,
+                                                    vfloat32mf2_t vs2,
+                                                    size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_tum(vbool32_t vm,
+                                                    vbfloat16mf2_t vd,
+                                                    vfloat32m1_t vs2,
+                                                    size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_tum(vbool16_t vm,
+                                                  vbfloat16m1_t vd,
+                                                  vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                                  vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                                  vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_tumu(vbool64_t vm,
+                                                     vbfloat16mf4_t vd,
+                                                     vfloat32mf2_t vs2,
+                                                     size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_tumu(vbool32_t vm,
+                                                     vbfloat16mf2_t vd,
+                                                     vfloat32m1_t vs2,
+                                                     size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_tumu(vbool16_t vm,
+                                                   vbfloat16m1_t vd,
+                                                   vfloat32m2_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_tumu(vbool8_t vm,
+                                                   vbfloat16m2_t vd,
+                                                   vfloat32m4_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_tumu(vbool4_t vm,
+                                                   vbfloat16m4_t vd,
+                                                   vfloat32m8_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_mu(vbool64_t vm,
+                                                   vbfloat16mf4_t vd,
+                                                   vfloat32mf2_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_mu(vbool32_t vm,
+                                                   vbfloat16mf2_t vd,
+                                                   vfloat32m1_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                                 vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                                 vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                                 vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vfwcvtbf16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vfwcvtbf16.c
new file mode 100644
index 000000000..f3cf547d3
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vfwcvtbf16.c
@@ -0,0 +1,109 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_tu(vfloat32mf2_t vd,
+                                              vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tu(vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_tu(vfloat32m1_t vd, vbfloat16mf2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwcvtbf16_f_tu(vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_tu(vfloat32m2_t vd, vbfloat16m1_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwcvtbf16_f_tu(vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_tu(vfloat32m4_t vd, vbfloat16m2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwcvtbf16_f_tu(vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_tu(vfloat32m8_t vd, vbfloat16m4_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwcvtbf16_f_tu(vd, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                               vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_tum(vbool32_t vm, vfloat32m1_t vd,
+                                             vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_tum(vbool16_t vm, vfloat32m2_t vd,
+                                             vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_tum(vbool8_t vm, vfloat32m4_t vd,
+                                             vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_tum(vbool4_t vm, vfloat32m8_t vd,
+                                             vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                                vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                              vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                              vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                              vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                              vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                              vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_mu(vbool32_t vm, vfloat32m1_t vd,
+                                            vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_mu(vbool16_t vm, vfloat32m2_t vd,
+                                            vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_mu(vbool8_t vm, vfloat32m4_t vd,
+                                            vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_mu(vbool4_t vm, vfloat32m8_t vd,
+                                            vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_mu(vm, vd, vs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vfwmaccbf16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vfwmaccbf16.c
new file mode 100644
index 000000000..7e97888b8
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vfwmaccbf16.c
@@ -0,0 +1,473 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_tu(vfloat32mf2_t vd,
+                                            vbfloat16mf4_t vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_tu(vfloat32mf2_t vd, __bf16 vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_tu(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_tu(vfloat32m1_t vd, __bf16 vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_tu(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                          vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_tu(vfloat32m2_t vd, __bf16 vs1,
+                                          vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_tu(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                          vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_tu(vfloat32m4_t vd, __bf16 vs1,
+                                          vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_tu(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                          vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_tu(vfloat32m8_t vd, __bf16 vs1,
+                                          vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                             vbfloat16mf4_t vs1,
+                                             vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                             __bf16 vs1, vbfloat16mf4_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_tum(vbool32_t vm, vfloat32m1_t vd,
+                                           vbfloat16mf2_t vs1,
+                                           vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_tum(vbool32_t vm, vfloat32m1_t vd,
+                                           __bf16 vs1, vbfloat16mf2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_tum(vbool16_t vm, vfloat32m2_t vd,
+                                           vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_tum(vbool16_t vm, vfloat32m2_t vd,
+                                           __bf16 vs1, vbfloat16m1_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_tum(vbool8_t vm, vfloat32m4_t vd,
+                                           vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_tum(vbool8_t vm, vfloat32m4_t vd,
+                                           __bf16 vs1, vbfloat16m2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_tum(vbool4_t vm, vfloat32m8_t vd,
+                                           vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_tum(vbool4_t vm, vfloat32m8_t vd,
+                                           __bf16 vs1, vbfloat16m4_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                              vbfloat16mf4_t vs1,
+                                              vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                              __bf16 vs1, vbfloat16mf4_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                            vbfloat16mf2_t vs1,
+                                            vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                            __bf16 vs1, vbfloat16mf2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                            vbfloat16m1_t vs1,
+                                            vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                            __bf16 vs1, vbfloat16m1_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                            vbfloat16m2_t vs1,
+                                            vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                            __bf16 vs1, vbfloat16m2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                            vbfloat16m4_t vs1,
+                                            vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                            __bf16 vs1, vbfloat16m4_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                            vbfloat16mf4_t vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                            __bf16 vs1, vbfloat16mf4_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_mu(vbool32_t vm, vfloat32m1_t vd,
+                                          vbfloat16mf2_t vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_mu(vbool32_t vm, vfloat32m1_t vd,
+                                          __bf16 vs1, vbfloat16mf2_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_mu(vbool16_t vm, vfloat32m2_t vd,
+                                          vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_mu(vbool16_t vm, vfloat32m2_t vd,
+                                          __bf16 vs1, vbfloat16m1_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_mu(vbool8_t vm, vfloat32m4_t vd,
+                                          vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_mu(vbool8_t vm, vfloat32m4_t vd,
+                                          __bf16 vs1, vbfloat16m2_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_mu(vbool4_t vm, vfloat32m8_t vd,
+                                          vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_mu(vbool4_t vm, vfloat32m8_t vd,
+                                          __bf16 vs1, vbfloat16m4_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_tu(vfloat32mf2_t vd,
+                                               vbfloat16mf4_t vs1,
+                                               vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_tu(vfloat32mf2_t vd, __bf16 vs1,
+                                               vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_tu(vfloat32m1_t vd,
+                                             vbfloat16mf2_t vs1,
+                                             vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_tu(vfloat32m1_t vd, __bf16 vs1,
+                                             vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_tu(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                             vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_tu(vfloat32m2_t vd, __bf16 vs1,
+                                             vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_tu(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                             vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_tu(vfloat32m4_t vd, __bf16 vs1,
+                                             vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_tu(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                             vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_tu(vfloat32m8_t vd, __bf16 vs1,
+                                             vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                                vbfloat16mf4_t vs1,
+                                                vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                                __bf16 vs1, vbfloat16mf4_t vs2,
+                                                size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_tum(vbool32_t vm, vfloat32m1_t vd,
+                                              vbfloat16mf2_t vs1,
+                                              vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_tum(vbool32_t vm, vfloat32m1_t vd,
+                                              __bf16 vs1, vbfloat16mf2_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_tum(vbool16_t vm, vfloat32m2_t vd,
+                                              vbfloat16m1_t vs1,
+                                              vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_tum(vbool16_t vm, vfloat32m2_t vd,
+                                              __bf16 vs1, vbfloat16m1_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_tum(vbool8_t vm, vfloat32m4_t vd,
+                                              vbfloat16m2_t vs1,
+                                              vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_tum(vbool8_t vm, vfloat32m4_t vd,
+                                              __bf16 vs1, vbfloat16m2_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_tum(vbool4_t vm, vfloat32m8_t vd,
+                                              vbfloat16m4_t vs1,
+                                              vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_tum(vbool4_t vm, vfloat32m8_t vd,
+                                              __bf16 vs1, vbfloat16m4_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                                 vbfloat16mf4_t vs1,
+                                                 vbfloat16mf4_t vs2,
+                                                 size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                                 __bf16 vs1, vbfloat16mf4_t vs2,
+                                                 size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                               vbfloat16mf2_t vs1,
+                                               vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                               __bf16 vs1, vbfloat16mf2_t vs2,
+                                               size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                               vbfloat16m1_t vs1,
+                                               vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                               __bf16 vs1, vbfloat16m1_t vs2,
+                                               size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                               vbfloat16m2_t vs1,
+                                               vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                               __bf16 vs1, vbfloat16m2_t vs2,
+                                               size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                               vbfloat16m4_t vs1,
+                                               vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                               __bf16 vs1, vbfloat16m4_t vs2,
+                                               size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                               vbfloat16mf4_t vs1,
+                                               vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                               __bf16 vs1, vbfloat16mf4_t vs2,
+                                               size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_mu(vbool32_t vm, vfloat32m1_t vd,
+                                             vbfloat16mf2_t vs1,
+                                             vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_mu(vbool32_t vm, vfloat32m1_t vd,
+                                             __bf16 vs1, vbfloat16mf2_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_mu(vbool16_t vm, vfloat32m2_t vd,
+                                             vbfloat16m1_t vs1,
+                                             vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_mu(vbool16_t vm, vfloat32m2_t vd,
+                                             __bf16 vs1, vbfloat16m1_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_mu(vbool8_t vm, vfloat32m4_t vd,
+                                             vbfloat16m2_t vs1,
+                                             vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_mu(vbool8_t vm, vfloat32m4_t vd,
+                                             __bf16 vs1, vbfloat16m2_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_mu(vbool4_t vm, vfloat32m8_t vd,
+                                             vbfloat16m4_t vs1,
+                                             vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_mu(vbool4_t vm, vfloat32m8_t vd,
+                                             __bf16 vs1, vbfloat16m4_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vle16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vle16.c
new file mode 100644
index 000000000..b4d522202
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vle16.c
@@ -0,0 +1,129 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                       size_t vl) {
+  return __riscv_vle16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                       size_t vl) {
+  return __riscv_vle16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                     size_t vl) {
+  return __riscv_vle16_tu(vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                     size_t vl) {
+  return __riscv_vle16_tu(vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                     size_t vl) {
+  return __riscv_vle16_tu(vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                     size_t vl) {
+  return __riscv_vle16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                        const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                        const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                      const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                      const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                      const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                      const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                         const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                         const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                     const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                     const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                     const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                     const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vle16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vle16ff.c
new file mode 100644
index 000000000..1606b1fbc
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vle16ff.c
@@ -0,0 +1,147 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                         size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                         size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl) {
+  return __riscv_vle16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl) {
+  return __riscv_vle16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl) {
+  return __riscv_vle16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl) {
+  return __riscv_vle16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl) {
+  return __riscv_vle16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl) {
+  return __riscv_vle16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                           const __bf16 *rs1, size_t *new_vl,
+                                           size_t vl) {
+  return __riscv_vle16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                           const __bf16 *rs1, size_t *new_vl,
+                                           size_t vl) {
+  return __riscv_vle16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl) {
+  return __riscv_vle16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl) {
+  return __riscv_vle16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl) {
+  return __riscv_vle16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl) {
+  return __riscv_vle16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxei16.c
new file mode 100644
index 000000000..030ec50fe
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxei16.c
@@ -0,0 +1,147 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                          vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                          vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                        vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                        vuint16m8_t rs2, size_t vl) {
+  return __riscv_vloxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl) {
+  return __riscv_vloxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl) {
+  return __riscv_vloxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl) {
+  return __riscv_vloxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl) {
+  return __riscv_vloxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                         const __bf16 *rs1, vuint16m4_t rs2,
+                                         size_t vl) {
+  return __riscv_vloxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                         const __bf16 *rs1, vuint16m8_t rs2,
+                                         size_t vl) {
+  return __riscv_vloxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                            const __bf16 *rs1, vuint16mf4_t rs2,
+                                            size_t vl) {
+  return __riscv_vloxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                            const __bf16 *rs1, vuint16mf2_t rs2,
+                                            size_t vl) {
+  return __riscv_vloxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                          const __bf16 *rs1, vuint16m4_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                          const __bf16 *rs1, vuint16m8_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl) {
+  return __riscv_vloxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl) {
+  return __riscv_vloxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                        const __bf16 *rs1, vuint16m4_t rs2,
+                                        size_t vl) {
+  return __riscv_vloxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                        const __bf16 *rs1, vuint16m8_t rs2,
+                                        size_t vl) {
+  return __riscv_vloxei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg2ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg2ei16.c
new file mode 100644
index 000000000..bfd06a7c9
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg2ei16.c
@@ -0,0 +1,146 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                   vbfloat16mf4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                   vbfloat16mf2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_tum(vbool16_t vm,
+                                                 vbfloat16m1x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_tum(vbool8_t vm,
+                                                 vbfloat16m2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_tum(vbool4_t vm,
+                                                 vbfloat16m4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                  vbfloat16m1x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_tumu(vbool8_t vm,
+                                                  vbfloat16m2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_tumu(vbool4_t vm,
+                                                  vbfloat16m4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                  vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                  vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_mu(vbool16_t vm,
+                                                vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg3ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg3ei16.c
new file mode 100644
index 000000000..bc2236ffe
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg3ei16.c
@@ -0,0 +1,120 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                   vbfloat16mf4x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                   vbfloat16mf2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_tum(vbool16_t vm,
+                                                 vbfloat16m1x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_tum(vbool8_t vm,
+                                                 vbfloat16m2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                  vbfloat16m1x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_tumu(vbool8_t vm,
+                                                  vbfloat16m2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                  vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                  vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_mu(vbool16_t vm,
+                                                vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg4ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg4ei16.c
new file mode 100644
index 000000000..bf63f5b4d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg4ei16.c
@@ -0,0 +1,120 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                   vbfloat16mf4x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                   vbfloat16mf2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_tum(vbool16_t vm,
+                                                 vbfloat16m1x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_tum(vbool8_t vm,
+                                                 vbfloat16m2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                  vbfloat16m1x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_tumu(vbool8_t vm,
+                                                  vbfloat16m2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                  vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                  vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_mu(vbool16_t vm,
+                                                vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg5ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg5ei16.c
new file mode 100644
index 000000000..e96465773
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg5ei16.c
@@ -0,0 +1,94 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                   vbfloat16mf4x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg5ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                   vbfloat16mf2x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg5ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_tum(vbool16_t vm,
+                                                 vbfloat16m1x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg5ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg5ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                  vbfloat16m1x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                  vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                  vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_mu(vbool16_t vm,
+                                                vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg6ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg6ei16.c
new file mode 100644
index 000000000..9e6ba4c51
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg6ei16.c
@@ -0,0 +1,94 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                   vbfloat16mf4x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg6ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                   vbfloat16mf2x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg6ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_tum(vbool16_t vm,
+                                                 vbfloat16m1x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg6ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg6ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                  vbfloat16m1x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                  vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                  vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_mu(vbool16_t vm,
+                                                vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg7ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg7ei16.c
new file mode 100644
index 000000000..c928c645d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg7ei16.c
@@ -0,0 +1,94 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                   vbfloat16mf4x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg7ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                   vbfloat16mf2x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg7ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_tum(vbool16_t vm,
+                                                 vbfloat16m1x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg7ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg7ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                  vbfloat16m1x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                  vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                  vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_mu(vbool16_t vm,
+                                                vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg8ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg8ei16.c
new file mode 100644
index 000000000..9d84571b0
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vloxseg8ei16.c
@@ -0,0 +1,94 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                   vbfloat16mf4x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg8ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                   vbfloat16mf2x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg8ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_tum(vbool16_t vm,
+                                                 vbfloat16m1x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg8ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg8ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                  vbfloat16m1x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                  vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                  vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_mu(vbool16_t vm,
+                                                vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlse16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlse16.c
new file mode 100644
index 000000000..e6ea668b7
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlse16.c
@@ -0,0 +1,147 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                        ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                        ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl) {
+  return __riscv_vlse16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl) {
+  return __riscv_vlse16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl) {
+  return __riscv_vlse16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl) {
+  return __riscv_vlse16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl) {
+  return __riscv_vlse16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl) {
+  return __riscv_vlse16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                          const __bf16 *rs1, ptrdiff_t rs2,
+                                          size_t vl) {
+  return __riscv_vlse16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                          const __bf16 *rs1, ptrdiff_t rs2,
+                                          size_t vl) {
+  return __riscv_vlse16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl) {
+  return __riscv_vlse16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl) {
+  return __riscv_vlse16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl) {
+  return __riscv_vlse16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl) {
+  return __riscv_vlse16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg2e16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg2e16.c
new file mode 100644
index 000000000..b2dcd6244
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg2e16.c
@@ -0,0 +1,115 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                vbfloat16mf4x2_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                vbfloat16mf2x2_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x2_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x2_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_tumu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_mu(vbool64_t vm,
+                                               vbfloat16mf4x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_mu(vbool32_t vm,
+                                               vbfloat16mf2x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg2e16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg2e16ff.c
new file mode 100644
index 000000000..10ff7fa72
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg2e16ff.c
@@ -0,0 +1,139 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_tum(vbool64_t vm,
+                                                  vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_tum(vbool32_t vm,
+                                                  vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_tum(vbool16_t vm,
+                                                vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_tumu(vbool16_t vm,
+                                                 vbfloat16m1x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_tumu(vbool8_t vm,
+                                                 vbfloat16m2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_tumu(vbool4_t vm,
+                                                 vbfloat16m4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_mu(vbool64_t vm,
+                                                 vbfloat16mf4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_mu(vbool32_t vm,
+                                                 vbfloat16mf2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg3e16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg3e16.c
new file mode 100644
index 000000000..99aafb0ea
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg3e16.c
@@ -0,0 +1,95 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                vbfloat16mf4x3_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                vbfloat16mf2x3_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x3_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x3_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_tumu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_mu(vbool64_t vm,
+                                               vbfloat16mf4x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_mu(vbool32_t vm,
+                                               vbfloat16mf2x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg3e16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg3e16ff.c
new file mode 100644
index 000000000..f60dc61d2
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg3e16ff.c
@@ -0,0 +1,114 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_tum(vbool64_t vm,
+                                                  vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_tum(vbool32_t vm,
+                                                  vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_tum(vbool16_t vm,
+                                                vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_tumu(vbool16_t vm,
+                                                 vbfloat16m1x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_tumu(vbool8_t vm,
+                                                 vbfloat16m2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_mu(vbool64_t vm,
+                                                 vbfloat16mf4x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_mu(vbool32_t vm,
+                                                 vbfloat16mf2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg4e16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg4e16.c
new file mode 100644
index 000000000..6044ae82c
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg4e16.c
@@ -0,0 +1,95 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                vbfloat16mf4x4_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                vbfloat16mf2x4_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x4_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x4_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_tumu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_mu(vbool64_t vm,
+                                               vbfloat16mf4x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_mu(vbool32_t vm,
+                                               vbfloat16mf2x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg4e16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg4e16ff.c
new file mode 100644
index 000000000..d84438041
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg4e16ff.c
@@ -0,0 +1,114 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_tum(vbool64_t vm,
+                                                  vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_tum(vbool32_t vm,
+                                                  vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_tum(vbool16_t vm,
+                                                vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_tumu(vbool16_t vm,
+                                                 vbfloat16m1x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_tumu(vbool8_t vm,
+                                                 vbfloat16m2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_mu(vbool64_t vm,
+                                                 vbfloat16mf4x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_mu(vbool32_t vm,
+                                                 vbfloat16mf2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg5e16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg5e16.c
new file mode 100644
index 000000000..e45b62947
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg5e16.c
@@ -0,0 +1,75 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                vbfloat16mf4x5_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                vbfloat16mf2x5_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x5_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x5_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_tumu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_mu(vbool64_t vm,
+                                               vbfloat16mf4x5_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_mu(vbool32_t vm,
+                                               vbfloat16mf2x5_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg5e16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg5e16ff.c
new file mode 100644
index 000000000..f2e2a29e8
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg5e16ff.c
@@ -0,0 +1,89 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_tum(vbool64_t vm,
+                                                  vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_tum(vbool32_t vm,
+                                                  vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_tum(vbool16_t vm,
+                                                vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_tumu(vbool16_t vm,
+                                                 vbfloat16m1x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_mu(vbool64_t vm,
+                                                 vbfloat16mf4x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_mu(vbool32_t vm,
+                                                 vbfloat16mf2x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg6e16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg6e16.c
new file mode 100644
index 000000000..5a8553113
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg6e16.c
@@ -0,0 +1,75 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                vbfloat16mf4x6_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                vbfloat16mf2x6_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x6_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x6_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_tumu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_mu(vbool64_t vm,
+                                               vbfloat16mf4x6_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_mu(vbool32_t vm,
+                                               vbfloat16mf2x6_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg6e16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg6e16ff.c
new file mode 100644
index 000000000..9cc747362
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg6e16ff.c
@@ -0,0 +1,89 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_tum(vbool64_t vm,
+                                                  vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_tum(vbool32_t vm,
+                                                  vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_tum(vbool16_t vm,
+                                                vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_tumu(vbool16_t vm,
+                                                 vbfloat16m1x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_mu(vbool64_t vm,
+                                                 vbfloat16mf4x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_mu(vbool32_t vm,
+                                                 vbfloat16mf2x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg7e16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg7e16.c
new file mode 100644
index 000000000..597884116
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg7e16.c
@@ -0,0 +1,75 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                vbfloat16mf4x7_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                vbfloat16mf2x7_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x7_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x7_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_tumu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_mu(vbool64_t vm,
+                                               vbfloat16mf4x7_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_mu(vbool32_t vm,
+                                               vbfloat16mf2x7_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg7e16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg7e16ff.c
new file mode 100644
index 000000000..5fe5b9d44
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg7e16ff.c
@@ -0,0 +1,89 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_tum(vbool64_t vm,
+                                                  vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_tum(vbool32_t vm,
+                                                  vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_tum(vbool16_t vm,
+                                                vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_tumu(vbool16_t vm,
+                                                 vbfloat16m1x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_mu(vbool64_t vm,
+                                                 vbfloat16mf4x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_mu(vbool32_t vm,
+                                                 vbfloat16mf2x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg8e16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg8e16.c
new file mode 100644
index 000000000..dfbe9f89e
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg8e16.c
@@ -0,0 +1,75 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                vbfloat16mf4x8_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                vbfloat16mf2x8_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x8_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x8_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_tumu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_mu(vbool64_t vm,
+                                               vbfloat16mf4x8_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_mu(vbool32_t vm,
+                                               vbfloat16mf2x8_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg8e16ff.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg8e16ff.c
new file mode 100644
index 000000000..fa0322aae
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlseg8e16ff.c
@@ -0,0 +1,89 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_tum(vbool64_t vm,
+                                                  vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_tum(vbool32_t vm,
+                                                  vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_tum(vbool16_t vm,
+                                                vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_tumu(vbool16_t vm,
+                                                 vbfloat16m1x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_mu(vbool64_t vm,
+                                                 vbfloat16mf4x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_mu(vbool32_t vm,
+                                                 vbfloat16mf2x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg2e16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg2e16.c
new file mode 100644
index 000000000..976c21a17
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg2e16.c
@@ -0,0 +1,136 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                 vbfloat16mf4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                 vbfloat16mf2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg2e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg2e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg2e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                vbfloat16mf4x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                vbfloat16mf2x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg3e16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg3e16.c
new file mode 100644
index 000000000..ed857c46d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg3e16.c
@@ -0,0 +1,112 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg3e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg3e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                 vbfloat16mf4x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                 vbfloat16mf2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg3e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg3e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                vbfloat16mf4x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                vbfloat16mf2x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg3e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg3e16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg4e16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg4e16.c
new file mode 100644
index 000000000..311d4477d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg4e16.c
@@ -0,0 +1,112 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg4e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg4e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                 vbfloat16mf4x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                 vbfloat16mf2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg4e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg4e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                vbfloat16mf4x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                vbfloat16mf2x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg4e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg4e16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg5e16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg5e16.c
new file mode 100644
index 000000000..a47eb41a5
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg5e16.c
@@ -0,0 +1,88 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg5e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                 vbfloat16mf4x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                 vbfloat16mf2x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg5e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                vbfloat16mf4x5_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                vbfloat16mf2x5_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg5e16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg6e16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg6e16.c
new file mode 100644
index 000000000..df9e43fd1
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg6e16.c
@@ -0,0 +1,88 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg6e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                 vbfloat16mf4x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                 vbfloat16mf2x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg6e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                vbfloat16mf4x6_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                vbfloat16mf2x6_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg6e16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg7e16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg7e16.c
new file mode 100644
index 000000000..e0dee0e84
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg7e16.c
@@ -0,0 +1,88 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg7e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                 vbfloat16mf4x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                 vbfloat16mf2x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg7e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                vbfloat16mf4x7_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                vbfloat16mf2x7_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg7e16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg8e16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg8e16.c
new file mode 100644
index 000000000..c41b7d405
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vlsseg8e16.c
@@ -0,0 +1,88 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg8e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                 vbfloat16mf4x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                 vbfloat16mf2x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg8e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                vbfloat16mf4x8_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                vbfloat16mf2x8_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg8e16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxei16.c
new file mode 100644
index 000000000..154bb4c04
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxei16.c
@@ -0,0 +1,147 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                          vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                          vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                        vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                        vuint16m8_t rs2, size_t vl) {
+  return __riscv_vluxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl) {
+  return __riscv_vluxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl) {
+  return __riscv_vluxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl) {
+  return __riscv_vluxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl) {
+  return __riscv_vluxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                         const __bf16 *rs1, vuint16m4_t rs2,
+                                         size_t vl) {
+  return __riscv_vluxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                         const __bf16 *rs1, vuint16m8_t rs2,
+                                         size_t vl) {
+  return __riscv_vluxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                            const __bf16 *rs1, vuint16mf4_t rs2,
+                                            size_t vl) {
+  return __riscv_vluxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                            const __bf16 *rs1, vuint16mf2_t rs2,
+                                            size_t vl) {
+  return __riscv_vluxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                          const __bf16 *rs1, vuint16m4_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                          const __bf16 *rs1, vuint16m8_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl) {
+  return __riscv_vluxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl) {
+  return __riscv_vluxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                        const __bf16 *rs1, vuint16m4_t rs2,
+                                        size_t vl) {
+  return __riscv_vluxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                        const __bf16 *rs1, vuint16m8_t rs2,
+                                        size_t vl) {
+  return __riscv_vluxei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg2ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg2ei16.c
new file mode 100644
index 000000000..ec299a4a7
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg2ei16.c
@@ -0,0 +1,146 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                   vbfloat16mf4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                   vbfloat16mf2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_tum(vbool16_t vm,
+                                                 vbfloat16m1x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_tum(vbool8_t vm,
+                                                 vbfloat16m2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_tum(vbool4_t vm,
+                                                 vbfloat16m4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                  vbfloat16m1x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_tumu(vbool8_t vm,
+                                                  vbfloat16m2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_tumu(vbool4_t vm,
+                                                  vbfloat16m4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                  vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                  vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_mu(vbool16_t vm,
+                                                vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg3ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg3ei16.c
new file mode 100644
index 000000000..8639b8a86
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg3ei16.c
@@ -0,0 +1,120 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                   vbfloat16mf4x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                   vbfloat16mf2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_tum(vbool16_t vm,
+                                                 vbfloat16m1x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_tum(vbool8_t vm,
+                                                 vbfloat16m2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                  vbfloat16m1x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_tumu(vbool8_t vm,
+                                                  vbfloat16m2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                  vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                  vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_mu(vbool16_t vm,
+                                                vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg4ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg4ei16.c
new file mode 100644
index 000000000..be038080b
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg4ei16.c
@@ -0,0 +1,120 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                   vbfloat16mf4x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                   vbfloat16mf2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_tum(vbool16_t vm,
+                                                 vbfloat16m1x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_tum(vbool8_t vm,
+                                                 vbfloat16m2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                  vbfloat16m1x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_tumu(vbool8_t vm,
+                                                  vbfloat16m2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                  vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                  vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_mu(vbool16_t vm,
+                                                vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg5ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg5ei16.c
new file mode 100644
index 000000000..86786793d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg5ei16.c
@@ -0,0 +1,94 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                   vbfloat16mf4x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg5ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                   vbfloat16mf2x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg5ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_tum(vbool16_t vm,
+                                                 vbfloat16m1x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg5ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg5ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                  vbfloat16m1x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                  vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                  vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_mu(vbool16_t vm,
+                                                vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg6ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg6ei16.c
new file mode 100644
index 000000000..252b00479
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg6ei16.c
@@ -0,0 +1,94 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                   vbfloat16mf4x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg6ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                   vbfloat16mf2x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg6ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_tum(vbool16_t vm,
+                                                 vbfloat16m1x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg6ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg6ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                  vbfloat16m1x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                  vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                  vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_mu(vbool16_t vm,
+                                                vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg7ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg7ei16.c
new file mode 100644
index 000000000..485d088be
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg7ei16.c
@@ -0,0 +1,94 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                   vbfloat16mf4x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg7ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                   vbfloat16mf2x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg7ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_tum(vbool16_t vm,
+                                                 vbfloat16m1x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg7ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg7ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                  vbfloat16m1x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                  vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                  vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_mu(vbool16_t vm,
+                                                vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg8ei16.c b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg8ei16.c
new file mode 100644
index 000000000..b0e17955e
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/llvm-overloaded-tests/vluxseg8ei16.c
@@ -0,0 +1,94 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
+// RUN:   -target-feature +experimental-zvfh \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                   vbfloat16mf4x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg8ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                   vbfloat16mf2x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg8ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_tum(vbool16_t vm,
+                                                 vbfloat16m1x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg8ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg8ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                  vbfloat16m1x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                  vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                  vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_mu(vbool16_t vm,
+                                                vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vfncvtbf16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vfncvtbf16.c
new file mode 100644
index 000000000..9e3542923
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vfncvtbf16.c
@@ -0,0 +1,228 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_tu(vbfloat16mf4_t vd,
+                                                vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_tu(vbfloat16mf2_t vd,
+                                                vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_tu(vbfloat16m1_t vd,
+                                              vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_tu(vbfloat16m2_t vd,
+                                              vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_tu(vbfloat16m4_t vd,
+                                              vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_tum(vbool64_t vm,
+                                                 vbfloat16mf4_t vd,
+                                                 vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_tum(vbool32_t vm,
+                                                 vbfloat16mf2_t vd,
+                                                 vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                               vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                               vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                               vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_tumu(vbool64_t vm,
+                                                  vbfloat16mf4_t vd,
+                                                  vfloat32mf2_t vs2,
+                                                  size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_tumu(vbool32_t vm,
+                                                  vbfloat16mf2_t vd,
+                                                  vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                                vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                                vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                                vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                                vfloat32mf2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                                vfloat32m1_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                              vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                              vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                              vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_tu(vbfloat16mf4_t vd,
+                                                   vfloat32mf2_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_tu(vbfloat16mf2_t vd,
+                                                   vfloat32m1_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_tu(vbfloat16m1_t vd,
+                                                 vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_tu(vbfloat16m2_t vd,
+                                                 vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_tu(vbfloat16m4_t vd,
+                                                 vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tu(vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_tum(vbool64_t vm,
+                                                    vbfloat16mf4_t vd,
+                                                    vfloat32mf2_t vs2,
+                                                    size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_tum(vbool32_t vm,
+                                                    vbfloat16mf2_t vd,
+                                                    vfloat32m1_t vs2,
+                                                    size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_tum(vbool16_t vm,
+                                                  vbfloat16m1_t vd,
+                                                  vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                                  vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                                  vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_tum(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_tumu(vbool64_t vm,
+                                                     vbfloat16mf4_t vd,
+                                                     vfloat32mf2_t vs2,
+                                                     size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_tumu(vbool32_t vm,
+                                                     vbfloat16mf2_t vd,
+                                                     vfloat32m1_t vs2,
+                                                     size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_tumu(vbool16_t vm,
+                                                   vbfloat16m1_t vd,
+                                                   vfloat32m2_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_tumu(vbool8_t vm,
+                                                   vbfloat16m2_t vd,
+                                                   vfloat32m4_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_tumu(vbool4_t vm,
+                                                   vbfloat16m4_t vd,
+                                                   vfloat32m8_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_tumu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm_mu(vbool64_t vm,
+                                                   vbfloat16mf4_t vd,
+                                                   vfloat32mf2_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_mu(vbool32_t vm,
+                                                   vbfloat16mf2_t vd,
+                                                   vfloat32m1_t vs2,
+                                                   size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                                 vfloat32m2_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                                 vfloat32m4_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                                 vfloat32m8_t vs2, size_t vl) {
+  return __riscv_vfncvtbf16_f_mu(vm, vd, vs2, __RISCV_FRM_RNE, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vfwcvtbf16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vfwcvtbf16.c
new file mode 100644
index 000000000..dbf0a4d7d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vfwcvtbf16.c
@@ -0,0 +1,102 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_tu(vfloat32mf2_t vd,
+                                              vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tu(vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_tu(vfloat32m1_t vd, vbfloat16mf2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwcvtbf16_f_tu(vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_tu(vfloat32m2_t vd, vbfloat16m1_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwcvtbf16_f_tu(vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_tu(vfloat32m4_t vd, vbfloat16m2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwcvtbf16_f_tu(vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_tu(vfloat32m8_t vd, vbfloat16m4_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwcvtbf16_f_tu(vd, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                               vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_tum(vbool32_t vm, vfloat32m1_t vd,
+                                             vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_tum(vbool16_t vm, vfloat32m2_t vd,
+                                             vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_tum(vbool8_t vm, vfloat32m4_t vd,
+                                             vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_tum(vbool4_t vm, vfloat32m8_t vd,
+                                             vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tum(vm, vd, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                                vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                              vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                              vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                              vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                              vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_tumu(vm, vd, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                              vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_mu(vbool32_t vm, vfloat32m1_t vd,
+                                            vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_mu(vbool16_t vm, vfloat32m2_t vd,
+                                            vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_mu(vbool8_t vm, vfloat32m4_t vd,
+                                            vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_mu(vm, vd, vs2, vl);
+}
+
+vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_mu(vbool4_t vm, vfloat32m8_t vd,
+                                            vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwcvtbf16_f_mu(vm, vd, vs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vfwmaccbf16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vfwmaccbf16.c
new file mode 100644
index 000000000..c20b7c37d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vfwmaccbf16.c
@@ -0,0 +1,466 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_tu(vfloat32mf2_t vd,
+                                            vbfloat16mf4_t vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_tu(vfloat32mf2_t vd, __bf16 vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_tu(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_tu(vfloat32m1_t vd, __bf16 vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_tu(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                          vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_tu(vfloat32m2_t vd, __bf16 vs1,
+                                          vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_tu(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                          vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_tu(vfloat32m4_t vd, __bf16 vs1,
+                                          vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_tu(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                          vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_tu(vfloat32m8_t vd, __bf16 vs1,
+                                          vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                             vbfloat16mf4_t vs1,
+                                             vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                             __bf16 vs1, vbfloat16mf4_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_tum(vbool32_t vm, vfloat32m1_t vd,
+                                           vbfloat16mf2_t vs1,
+                                           vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_tum(vbool32_t vm, vfloat32m1_t vd,
+                                           __bf16 vs1, vbfloat16mf2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_tum(vbool16_t vm, vfloat32m2_t vd,
+                                           vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_tum(vbool16_t vm, vfloat32m2_t vd,
+                                           __bf16 vs1, vbfloat16m1_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_tum(vbool8_t vm, vfloat32m4_t vd,
+                                           vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_tum(vbool8_t vm, vfloat32m4_t vd,
+                                           __bf16 vs1, vbfloat16m2_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_tum(vbool4_t vm, vfloat32m8_t vd,
+                                           vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_tum(vbool4_t vm, vfloat32m8_t vd,
+                                           __bf16 vs1, vbfloat16m4_t vs2,
+                                           size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                              vbfloat16mf4_t vs1,
+                                              vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                              __bf16 vs1, vbfloat16mf4_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                            vbfloat16mf2_t vs1,
+                                            vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                            __bf16 vs1, vbfloat16mf2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                            vbfloat16m1_t vs1,
+                                            vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                            __bf16 vs1, vbfloat16m1_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                            vbfloat16m2_t vs1,
+                                            vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                            __bf16 vs1, vbfloat16m2_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                            vbfloat16m4_t vs1,
+                                            vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                            __bf16 vs1, vbfloat16m4_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                            vbfloat16mf4_t vs1,
+                                            vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                            __bf16 vs1, vbfloat16mf4_t vs2,
+                                            size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_mu(vbool32_t vm, vfloat32m1_t vd,
+                                          vbfloat16mf2_t vs1,
+                                          vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_mu(vbool32_t vm, vfloat32m1_t vd,
+                                          __bf16 vs1, vbfloat16mf2_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_mu(vbool16_t vm, vfloat32m2_t vd,
+                                          vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_mu(vbool16_t vm, vfloat32m2_t vd,
+                                          __bf16 vs1, vbfloat16m1_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_mu(vbool8_t vm, vfloat32m4_t vd,
+                                          vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_mu(vbool8_t vm, vfloat32m4_t vd,
+                                          __bf16 vs1, vbfloat16m2_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_mu(vbool4_t vm, vfloat32m8_t vd,
+                                          vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_mu(vbool4_t vm, vfloat32m8_t vd,
+                                          __bf16 vs1, vbfloat16m4_t vs2,
+                                          size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_tu(vfloat32mf2_t vd,
+                                               vbfloat16mf4_t vs1,
+                                               vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_tu(vfloat32mf2_t vd, __bf16 vs1,
+                                               vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_tu(vfloat32m1_t vd,
+                                             vbfloat16mf2_t vs1,
+                                             vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_tu(vfloat32m1_t vd, __bf16 vs1,
+                                             vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_tu(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                             vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_tu(vfloat32m2_t vd, __bf16 vs1,
+                                             vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_tu(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                             vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_tu(vfloat32m4_t vd, __bf16 vs1,
+                                             vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_tu(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                             vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_tu(vfloat32m8_t vd, __bf16 vs1,
+                                             vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tu(vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                                vbfloat16mf4_t vs1,
+                                                vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                                __bf16 vs1, vbfloat16mf4_t vs2,
+                                                size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_tum(vbool32_t vm, vfloat32m1_t vd,
+                                              vbfloat16mf2_t vs1,
+                                              vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_tum(vbool32_t vm, vfloat32m1_t vd,
+                                              __bf16 vs1, vbfloat16mf2_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_tum(vbool16_t vm, vfloat32m2_t vd,
+                                              vbfloat16m1_t vs1,
+                                              vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_tum(vbool16_t vm, vfloat32m2_t vd,
+                                              __bf16 vs1, vbfloat16m1_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_tum(vbool8_t vm, vfloat32m4_t vd,
+                                              vbfloat16m2_t vs1,
+                                              vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_tum(vbool8_t vm, vfloat32m4_t vd,
+                                              __bf16 vs1, vbfloat16m2_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_tum(vbool4_t vm, vfloat32m8_t vd,
+                                              vbfloat16m4_t vs1,
+                                              vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_tum(vbool4_t vm, vfloat32m8_t vd,
+                                              __bf16 vs1, vbfloat16m4_t vs2,
+                                              size_t vl) {
+  return __riscv_vfwmaccbf16_tum(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                                 vbfloat16mf4_t vs1,
+                                                 vbfloat16mf4_t vs2,
+                                                 size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                                 __bf16 vs1, vbfloat16mf4_t vs2,
+                                                 size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                               vbfloat16mf2_t vs1,
+                                               vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                               __bf16 vs1, vbfloat16mf2_t vs2,
+                                               size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                               vbfloat16m1_t vs1,
+                                               vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                               __bf16 vs1, vbfloat16m1_t vs2,
+                                               size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                               vbfloat16m2_t vs1,
+                                               vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                               __bf16 vs1, vbfloat16m2_t vs2,
+                                               size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                               vbfloat16m4_t vs1,
+                                               vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                               __bf16 vs1, vbfloat16m4_t vs2,
+                                               size_t vl) {
+  return __riscv_vfwmaccbf16_tumu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vv_f32mf2_rm_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                               vbfloat16mf4_t vs1,
+                                               vbfloat16mf4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32mf2_t test_vfwmaccbf16_vf_f32mf2_rm_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                               __bf16 vs1, vbfloat16mf4_t vs2,
+                                               size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vv_f32m1_rm_mu(vbool32_t vm, vfloat32m1_t vd,
+                                             vbfloat16mf2_t vs1,
+                                             vbfloat16mf2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m1_t test_vfwmaccbf16_vf_f32m1_rm_mu(vbool32_t vm, vfloat32m1_t vd,
+                                             __bf16 vs1, vbfloat16mf2_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vv_f32m2_rm_mu(vbool16_t vm, vfloat32m2_t vd,
+                                             vbfloat16m1_t vs1,
+                                             vbfloat16m1_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m2_t test_vfwmaccbf16_vf_f32m2_rm_mu(vbool16_t vm, vfloat32m2_t vd,
+                                             __bf16 vs1, vbfloat16m1_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vv_f32m4_rm_mu(vbool8_t vm, vfloat32m4_t vd,
+                                             vbfloat16m2_t vs1,
+                                             vbfloat16m2_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m4_t test_vfwmaccbf16_vf_f32m4_rm_mu(vbool8_t vm, vfloat32m4_t vd,
+                                             __bf16 vs1, vbfloat16m2_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vv_f32m8_rm_mu(vbool4_t vm, vfloat32m8_t vd,
+                                             vbfloat16m4_t vs1,
+                                             vbfloat16m4_t vs2, size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
+
+vfloat32m8_t test_vfwmaccbf16_vf_f32m8_rm_mu(vbool4_t vm, vfloat32m8_t vd,
+                                             __bf16 vs1, vbfloat16m4_t vs2,
+                                             size_t vl) {
+  return __riscv_vfwmaccbf16_mu(vm, vd, vs1, vs2, __RISCV_FRM_RNE, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vle16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vle16.c
new file mode 100644
index 000000000..c62108cd5
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vle16.c
@@ -0,0 +1,122 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                       size_t vl) {
+  return __riscv_vle16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                       size_t vl) {
+  return __riscv_vle16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                     size_t vl) {
+  return __riscv_vle16_tu(vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                     size_t vl) {
+  return __riscv_vle16_tu(vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                     size_t vl) {
+  return __riscv_vle16_tu(vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                     size_t vl) {
+  return __riscv_vle16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                        const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                        const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                      const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                      const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                      const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                      const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                         const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                         const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4_t test_vle16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2_t test_vle16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                       const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1_t test_vle16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                     const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2_t test_vle16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                     const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4_t test_vle16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                     const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m8_t test_vle16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                     const __bf16 *rs1, size_t vl) {
+  return __riscv_vle16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vle16ff.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vle16ff.c
new file mode 100644
index 000000000..8311d7fa1
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vle16ff.c
@@ -0,0 +1,140 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                         size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                         size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl) {
+  return __riscv_vle16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl) {
+  return __riscv_vle16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl) {
+  return __riscv_vle16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl) {
+  return __riscv_vle16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl) {
+  return __riscv_vle16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl) {
+  return __riscv_vle16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl) {
+  return __riscv_vle16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                           const __bf16 *rs1, size_t *new_vl,
+                                           size_t vl) {
+  return __riscv_vle16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                           const __bf16 *rs1, size_t *new_vl,
+                                           size_t vl) {
+  return __riscv_vle16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4_t test_vle16ff_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2_t test_vle16ff_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl) {
+  return __riscv_vle16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1_t test_vle16ff_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl) {
+  return __riscv_vle16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2_t test_vle16ff_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl) {
+  return __riscv_vle16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4_t test_vle16ff_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl) {
+  return __riscv_vle16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m8_t test_vle16ff_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl) {
+  return __riscv_vle16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxei16.c
new file mode 100644
index 000000000..053e6dd94
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxei16.c
@@ -0,0 +1,140 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                          vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                          vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                        vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                        vuint16m8_t rs2, size_t vl) {
+  return __riscv_vloxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl) {
+  return __riscv_vloxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl) {
+  return __riscv_vloxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl) {
+  return __riscv_vloxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl) {
+  return __riscv_vloxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                         const __bf16 *rs1, vuint16m4_t rs2,
+                                         size_t vl) {
+  return __riscv_vloxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                         const __bf16 *rs1, vuint16m8_t rs2,
+                                         size_t vl) {
+  return __riscv_vloxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                            const __bf16 *rs1, vuint16mf4_t rs2,
+                                            size_t vl) {
+  return __riscv_vloxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                            const __bf16 *rs1, vuint16mf2_t rs2,
+                                            size_t vl) {
+  return __riscv_vloxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                          const __bf16 *rs1, vuint16m4_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                          const __bf16 *rs1, vuint16m8_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vloxei16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vloxei16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl) {
+  return __riscv_vloxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vloxei16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl) {
+  return __riscv_vloxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vloxei16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl) {
+  return __riscv_vloxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vloxei16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                        const __bf16 *rs1, vuint16m4_t rs2,
+                                        size_t vl) {
+  return __riscv_vloxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vloxei16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                        const __bf16 *rs1, vuint16m8_t rs2,
+                                        size_t vl) {
+  return __riscv_vloxei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg2ei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg2ei16.c
new file mode 100644
index 000000000..cebce8595
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg2ei16.c
@@ -0,0 +1,139 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                   vbfloat16mf4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                   vbfloat16mf2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_tum(vbool16_t vm,
+                                                 vbfloat16m1x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_tum(vbool8_t vm,
+                                                 vbfloat16m2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_tum(vbool4_t vm,
+                                                 vbfloat16m4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                  vbfloat16m1x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_tumu(vbool8_t vm,
+                                                  vbfloat16m2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_tumu(vbool4_t vm,
+                                                  vbfloat16m4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vloxseg2ei16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                  vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vloxseg2ei16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                  vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vloxseg2ei16_v_bf16m1x2_mu(vbool16_t vm,
+                                                vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vloxseg2ei16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vloxseg2ei16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl) {
+  return __riscv_vloxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg3ei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg3ei16.c
new file mode 100644
index 000000000..7dc1de409
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg3ei16.c
@@ -0,0 +1,113 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                   vbfloat16mf4x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                   vbfloat16mf2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_tum(vbool16_t vm,
+                                                 vbfloat16m1x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_tum(vbool8_t vm,
+                                                 vbfloat16m2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                  vbfloat16m1x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_tumu(vbool8_t vm,
+                                                  vbfloat16m2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vloxseg3ei16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                  vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vloxseg3ei16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                  vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vloxseg3ei16_v_bf16m1x3_mu(vbool16_t vm,
+                                                vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vloxseg3ei16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg4ei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg4ei16.c
new file mode 100644
index 000000000..a8db59018
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg4ei16.c
@@ -0,0 +1,113 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                   vbfloat16mf4x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                   vbfloat16mf2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_tum(vbool16_t vm,
+                                                 vbfloat16m1x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_tum(vbool8_t vm,
+                                                 vbfloat16m2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                  vbfloat16m1x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_tumu(vbool8_t vm,
+                                                  vbfloat16m2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vloxseg4ei16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                  vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vloxseg4ei16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                  vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vloxseg4ei16_v_bf16m1x4_mu(vbool16_t vm,
+                                                vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vloxseg4ei16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vloxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg5ei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg5ei16.c
new file mode 100644
index 000000000..28cb437cb
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg5ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                   vbfloat16mf4x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg5ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                   vbfloat16mf2x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg5ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_tum(vbool16_t vm,
+                                                 vbfloat16m1x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg5ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg5ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                  vbfloat16m1x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vloxseg5ei16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                  vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vloxseg5ei16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                  vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vloxseg5ei16_v_bf16m1x5_mu(vbool16_t vm,
+                                                vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg5ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg6ei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg6ei16.c
new file mode 100644
index 000000000..9745d16e8
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg6ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                   vbfloat16mf4x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg6ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                   vbfloat16mf2x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg6ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_tum(vbool16_t vm,
+                                                 vbfloat16m1x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg6ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg6ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                  vbfloat16m1x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vloxseg6ei16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                  vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vloxseg6ei16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                  vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vloxseg6ei16_v_bf16m1x6_mu(vbool16_t vm,
+                                                vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg6ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg7ei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg7ei16.c
new file mode 100644
index 000000000..6b64fef2c
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg7ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                   vbfloat16mf4x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg7ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                   vbfloat16mf2x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg7ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_tum(vbool16_t vm,
+                                                 vbfloat16m1x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg7ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg7ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                  vbfloat16m1x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vloxseg7ei16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                  vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vloxseg7ei16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                  vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vloxseg7ei16_v_bf16m1x7_mu(vbool16_t vm,
+                                                vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg7ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg8ei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg8ei16.c
new file mode 100644
index 000000000..e5b6607d2
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vloxseg8ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                   vbfloat16mf4x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg8ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                   vbfloat16mf2x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vloxseg8ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_tum(vbool16_t vm,
+                                                 vbfloat16m1x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg8ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vloxseg8ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                  vbfloat16m1x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vloxseg8ei16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                  vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vloxseg8ei16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                  vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vloxseg8ei16_v_bf16m1x8_mu(vbool16_t vm,
+                                                vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vloxseg8ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlse16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlse16.c
new file mode 100644
index 000000000..f31b6dae1
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlse16.c
@@ -0,0 +1,140 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                        ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                        ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlse16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl) {
+  return __riscv_vlse16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl) {
+  return __riscv_vlse16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl) {
+  return __riscv_vlse16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl) {
+  return __riscv_vlse16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl) {
+  return __riscv_vlse16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl) {
+  return __riscv_vlse16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                          const __bf16 *rs1, ptrdiff_t rs2,
+                                          size_t vl) {
+  return __riscv_vlse16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                          const __bf16 *rs1, ptrdiff_t rs2,
+                                          size_t vl) {
+  return __riscv_vlse16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vlse16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vlse16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl) {
+  return __riscv_vlse16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vlse16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl) {
+  return __riscv_vlse16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vlse16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl) {
+  return __riscv_vlse16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vlse16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl) {
+  return __riscv_vlse16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vlse16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl) {
+  return __riscv_vlse16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg2e16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg2e16.c
new file mode 100644
index 000000000..adf0bcfd7
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg2e16.c
@@ -0,0 +1,108 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                vbfloat16mf4x2_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                vbfloat16mf2x2_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x2_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x2_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_tumu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16_v_bf16mf4x2_mu(vbool64_t vm,
+                                               vbfloat16mf4x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16_v_bf16mf2x2_mu(vbool32_t vm,
+                                               vbfloat16mf2x2_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg2e16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg2e16ff.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg2e16ff.c
new file mode 100644
index 000000000..94daad69a
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg2e16ff.c
@@ -0,0 +1,132 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_tum(vbool64_t vm,
+                                                  vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_tum(vbool32_t vm,
+                                                  vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_tum(vbool16_t vm,
+                                                vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_tumu(vbool16_t vm,
+                                                 vbfloat16m1x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_tumu(vbool8_t vm,
+                                                 vbfloat16m2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_tumu(vbool4_t vm,
+                                                 vbfloat16m4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x2_t test_vlseg2e16ff_v_bf16mf4x2_mu(vbool64_t vm,
+                                                 vbfloat16mf4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x2_t test_vlseg2e16ff_v_bf16mf2x2_mu(vbool32_t vm,
+                                                 vbfloat16mf2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x2_t test_vlseg2e16ff_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x2_t test_vlseg2e16ff_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m4x2_t test_vlseg2e16ff_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg2e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg3e16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg3e16.c
new file mode 100644
index 000000000..cf0d583ff
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg3e16.c
@@ -0,0 +1,88 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                vbfloat16mf4x3_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                vbfloat16mf2x3_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x3_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x3_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_tumu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16_v_bf16mf4x3_mu(vbool64_t vm,
+                                               vbfloat16mf4x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16_v_bf16mf2x3_mu(vbool32_t vm,
+                                               vbfloat16mf2x3_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg3e16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg3e16ff.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg3e16ff.c
new file mode 100644
index 000000000..24a610a5d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg3e16ff.c
@@ -0,0 +1,107 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_tum(vbool64_t vm,
+                                                  vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_tum(vbool32_t vm,
+                                                  vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_tum(vbool16_t vm,
+                                                vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_tumu(vbool16_t vm,
+                                                 vbfloat16m1x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_tumu(vbool8_t vm,
+                                                 vbfloat16m2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x3_t test_vlseg3e16ff_v_bf16mf4x3_mu(vbool64_t vm,
+                                                 vbfloat16mf4x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x3_t test_vlseg3e16ff_v_bf16mf2x3_mu(vbool32_t vm,
+                                                 vbfloat16mf2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x3_t test_vlseg3e16ff_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x3_t test_vlseg3e16ff_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg3e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg4e16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg4e16.c
new file mode 100644
index 000000000..a0311857a
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg4e16.c
@@ -0,0 +1,88 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                vbfloat16mf4x4_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                vbfloat16mf2x4_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x4_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x4_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_tumu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16_v_bf16mf4x4_mu(vbool64_t vm,
+                                               vbfloat16mf4x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16_v_bf16mf2x4_mu(vbool32_t vm,
+                                               vbfloat16mf2x4_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg4e16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg4e16ff.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg4e16ff.c
new file mode 100644
index 000000000..cc7cd3e8f
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg4e16ff.c
@@ -0,0 +1,107 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_tum(vbool64_t vm,
+                                                  vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_tum(vbool32_t vm,
+                                                  vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_tum(vbool16_t vm,
+                                                vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_tumu(vbool16_t vm,
+                                                 vbfloat16m1x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_tumu(vbool8_t vm,
+                                                 vbfloat16m2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x4_t test_vlseg4e16ff_v_bf16mf4x4_mu(vbool64_t vm,
+                                                 vbfloat16mf4x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x4_t test_vlseg4e16ff_v_bf16mf2x4_mu(vbool32_t vm,
+                                                 vbfloat16mf2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x4_t test_vlseg4e16ff_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m2x4_t test_vlseg4e16ff_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg4e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg5e16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg5e16.c
new file mode 100644
index 000000000..07e8b5d4e
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg5e16.c
@@ -0,0 +1,68 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                vbfloat16mf4x5_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                vbfloat16mf2x5_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x5_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x5_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_tumu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16_v_bf16mf4x5_mu(vbool64_t vm,
+                                               vbfloat16mf4x5_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16_v_bf16mf2x5_mu(vbool32_t vm,
+                                               vbfloat16mf2x5_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg5e16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg5e16ff.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg5e16ff.c
new file mode 100644
index 000000000..e13f2ef80
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg5e16ff.c
@@ -0,0 +1,82 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_tum(vbool64_t vm,
+                                                  vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_tum(vbool32_t vm,
+                                                  vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_tum(vbool16_t vm,
+                                                vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_tumu(vbool16_t vm,
+                                                 vbfloat16m1x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x5_t test_vlseg5e16ff_v_bf16mf4x5_mu(vbool64_t vm,
+                                                 vbfloat16mf4x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x5_t test_vlseg5e16ff_v_bf16mf2x5_mu(vbool32_t vm,
+                                                 vbfloat16mf2x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x5_t test_vlseg5e16ff_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg5e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg6e16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg6e16.c
new file mode 100644
index 000000000..58af0751a
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg6e16.c
@@ -0,0 +1,68 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                vbfloat16mf4x6_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                vbfloat16mf2x6_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x6_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x6_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_tumu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16_v_bf16mf4x6_mu(vbool64_t vm,
+                                               vbfloat16mf4x6_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16_v_bf16mf2x6_mu(vbool32_t vm,
+                                               vbfloat16mf2x6_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg6e16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg6e16ff.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg6e16ff.c
new file mode 100644
index 000000000..b27f2357d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg6e16ff.c
@@ -0,0 +1,82 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_tum(vbool64_t vm,
+                                                  vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_tum(vbool32_t vm,
+                                                  vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_tum(vbool16_t vm,
+                                                vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_tumu(vbool16_t vm,
+                                                 vbfloat16m1x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x6_t test_vlseg6e16ff_v_bf16mf4x6_mu(vbool64_t vm,
+                                                 vbfloat16mf4x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x6_t test_vlseg6e16ff_v_bf16mf2x6_mu(vbool32_t vm,
+                                                 vbfloat16mf2x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x6_t test_vlseg6e16ff_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg6e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg7e16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg7e16.c
new file mode 100644
index 000000000..4bfca3c35
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg7e16.c
@@ -0,0 +1,68 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                vbfloat16mf4x7_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                vbfloat16mf2x7_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x7_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x7_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_tumu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16_v_bf16mf4x7_mu(vbool64_t vm,
+                                               vbfloat16mf4x7_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16_v_bf16mf2x7_mu(vbool32_t vm,
+                                               vbfloat16mf2x7_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg7e16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg7e16ff.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg7e16ff.c
new file mode 100644
index 000000000..af9b65e7e
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg7e16ff.c
@@ -0,0 +1,82 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_tum(vbool64_t vm,
+                                                  vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_tum(vbool32_t vm,
+                                                  vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_tum(vbool16_t vm,
+                                                vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_tumu(vbool16_t vm,
+                                                 vbfloat16m1x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x7_t test_vlseg7e16ff_v_bf16mf4x7_mu(vbool64_t vm,
+                                                 vbfloat16mf4x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x7_t test_vlseg7e16ff_v_bf16mf2x7_mu(vbool32_t vm,
+                                                 vbfloat16mf2x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x7_t test_vlseg7e16ff_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg7e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg8e16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg8e16.c
new file mode 100644
index 000000000..653938350
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg8e16.c
@@ -0,0 +1,68 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tu(vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tu(vd, rs1, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                vbfloat16mf4x8_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                vbfloat16mf2x8_t vd,
+                                                const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                              const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tum(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                 vbfloat16mf4x8_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                 vbfloat16mf2x8_t vd,
+                                                 const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_tumu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_tumu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16_v_bf16mf4x8_mu(vbool64_t vm,
+                                               vbfloat16mf4x8_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16_v_bf16mf2x8_mu(vbool32_t vm,
+                                               vbfloat16mf2x8_t vd,
+                                               const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_mu(vm, vd, rs1, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                             const __bf16 *rs1, size_t vl) {
+  return __riscv_vlseg8e16_mu(vm, vd, rs1, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg8e16ff.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg8e16ff.c
new file mode 100644
index 000000000..a4c013385
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlseg8e16ff.c
@@ -0,0 +1,82 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tu(vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_tum(vbool64_t vm,
+                                                  vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_tum(vbool32_t vm,
+                                                  vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_tum(vbool16_t vm,
+                                                vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tum(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                   vbfloat16mf4x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                   vbfloat16mf2x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_tumu(vbool16_t vm,
+                                                 vbfloat16m1x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_tumu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf4x8_t test_vlseg8e16ff_v_bf16mf4x8_mu(vbool64_t vm,
+                                                 vbfloat16mf4x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16mf2x8_t test_vlseg8e16ff_v_bf16mf2x8_mu(vbool32_t vm,
+                                                 vbfloat16mf2x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
+
+vbfloat16m1x8_t test_vlseg8e16ff_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                               const __bf16 *rs1,
+                                               size_t *new_vl, size_t vl) {
+  return __riscv_vlseg8e16ff_mu(vm, vd, rs1, new_vl, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg2e16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg2e16.c
new file mode 100644
index 000000000..8fb0cd0fb
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg2e16.c
@@ -0,0 +1,129 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                 vbfloat16mf4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                 vbfloat16mf2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg2e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg2e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg2e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vlsseg2e16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                vbfloat16mf4x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vlsseg2e16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                vbfloat16mf2x2_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg2e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vlsseg2e16_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vlsseg2e16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vlsseg2e16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg2e16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg3e16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg3e16.c
new file mode 100644
index 000000000..cd0bf487b
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg3e16.c
@@ -0,0 +1,105 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg3e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg3e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                 vbfloat16mf4x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                 vbfloat16mf2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg3e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg3e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vlsseg3e16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                vbfloat16mf4x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vlsseg3e16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                vbfloat16mf2x3_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg3e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vlsseg3e16_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg3e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vlsseg3e16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg3e16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg4e16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg4e16.c
new file mode 100644
index 000000000..533804a3f
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg4e16.c
@@ -0,0 +1,105 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg4e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg4e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                 vbfloat16mf4x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                 vbfloat16mf2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg4e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg4e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vlsseg4e16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                vbfloat16mf4x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vlsseg4e16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                vbfloat16mf2x4_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg4e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vlsseg4e16_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg4e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vlsseg4e16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg4e16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg5e16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg5e16.c
new file mode 100644
index 000000000..677e6f2ec
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg5e16.c
@@ -0,0 +1,81 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg5e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                 vbfloat16mf4x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                 vbfloat16mf2x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg5e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vlsseg5e16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                vbfloat16mf4x5_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vlsseg5e16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                vbfloat16mf2x5_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg5e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vlsseg5e16_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg5e16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg6e16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg6e16.c
new file mode 100644
index 000000000..bdae126e0
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg6e16.c
@@ -0,0 +1,81 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg6e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                 vbfloat16mf4x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                 vbfloat16mf2x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg6e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vlsseg6e16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                vbfloat16mf4x6_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vlsseg6e16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                vbfloat16mf2x6_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg6e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vlsseg6e16_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg6e16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg7e16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg7e16.c
new file mode 100644
index 000000000..efd8b3a9d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg7e16.c
@@ -0,0 +1,81 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg7e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                 vbfloat16mf4x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                 vbfloat16mf2x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg7e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vlsseg7e16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                vbfloat16mf4x7_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vlsseg7e16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                vbfloat16mf2x7_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg7e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vlsseg7e16_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg7e16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg8e16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg8e16.c
new file mode 100644
index 000000000..97fd79283
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vlsseg8e16.c
@@ -0,0 +1,81 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg8e16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                 vbfloat16mf4x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                 vbfloat16mf2x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                               const __bf16 *rs1, ptrdiff_t rs2,
+                                               size_t vl) {
+  return __riscv_vlsseg8e16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                  vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                  vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vlsseg8e16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                vbfloat16mf4x8_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vlsseg8e16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                vbfloat16mf2x8_t vd,
+                                                const __bf16 *rs1,
+                                                ptrdiff_t rs2, size_t vl) {
+  return __riscv_vlsseg8e16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vlsseg8e16_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                              const __bf16 *rs1, ptrdiff_t rs2,
+                                              size_t vl) {
+  return __riscv_vlsseg8e16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxei16.c
new file mode 100644
index 000000000..226dec981
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxei16.c
@@ -0,0 +1,140 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                          vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                          vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                        vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                        vuint16m8_t rs2, size_t vl) {
+  return __riscv_vluxei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl) {
+  return __riscv_vluxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl) {
+  return __riscv_vluxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl) {
+  return __riscv_vluxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl) {
+  return __riscv_vluxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                         const __bf16 *rs1, vuint16m4_t rs2,
+                                         size_t vl) {
+  return __riscv_vluxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                         const __bf16 *rs1, vuint16m8_t rs2,
+                                         size_t vl) {
+  return __riscv_vluxei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                            const __bf16 *rs1, vuint16mf4_t rs2,
+                                            size_t vl) {
+  return __riscv_vluxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                            const __bf16 *rs1, vuint16mf2_t rs2,
+                                            size_t vl) {
+  return __riscv_vluxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                          const __bf16 *rs1, vuint16m4_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                          const __bf16 *rs1, vuint16m8_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4_t test_vluxei16_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2_t test_vluxei16_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl) {
+  return __riscv_vluxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1_t test_vluxei16_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl) {
+  return __riscv_vluxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2_t test_vluxei16_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl) {
+  return __riscv_vluxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4_t test_vluxei16_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                        const __bf16 *rs1, vuint16m4_t rs2,
+                                        size_t vl) {
+  return __riscv_vluxei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m8_t test_vluxei16_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                        const __bf16 *rs1, vuint16m8_t rs2,
+                                        size_t vl) {
+  return __riscv_vluxei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg2ei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg2ei16.c
new file mode 100644
index 000000000..dec0690d9
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg2ei16.c
@@ -0,0 +1,139 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_tu(vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_tu(vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_tu(vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_tum(vbool64_t vm,
+                                                   vbfloat16mf4x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_tum(vbool32_t vm,
+                                                   vbfloat16mf2x2_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_tum(vbool16_t vm,
+                                                 vbfloat16m1x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_tum(vbool8_t vm,
+                                                 vbfloat16m2x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_tum(vbool4_t vm,
+                                                 vbfloat16m4x2_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x2_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_tumu(vbool16_t vm,
+                                                  vbfloat16m1x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_tumu(vbool8_t vm,
+                                                  vbfloat16m2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_tumu(vbool4_t vm,
+                                                  vbfloat16m4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x2_t test_vluxseg2ei16_v_bf16mf4x2_mu(vbool64_t vm,
+                                                  vbfloat16mf4x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x2_t test_vluxseg2ei16_v_bf16mf2x2_mu(vbool32_t vm,
+                                                  vbfloat16mf2x2_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x2_t test_vluxseg2ei16_v_bf16m1x2_mu(vbool16_t vm,
+                                                vbfloat16m1x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x2_t test_vluxseg2ei16_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m4x2_t test_vluxseg2ei16_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m4_t rs2, size_t vl) {
+  return __riscv_vluxseg2ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg3ei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg3ei16.c
new file mode 100644
index 000000000..127d97bb5
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg3ei16.c
@@ -0,0 +1,113 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_tu(vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_tu(vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_tum(vbool64_t vm,
+                                                   vbfloat16mf4x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_tum(vbool32_t vm,
+                                                   vbfloat16mf2x3_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_tum(vbool16_t vm,
+                                                 vbfloat16m1x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_tum(vbool8_t vm,
+                                                 vbfloat16m2x3_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x3_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_tumu(vbool16_t vm,
+                                                  vbfloat16m1x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_tumu(vbool8_t vm,
+                                                  vbfloat16m2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x3_t test_vluxseg3ei16_v_bf16mf4x3_mu(vbool64_t vm,
+                                                  vbfloat16mf4x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x3_t test_vluxseg3ei16_v_bf16mf2x3_mu(vbool32_t vm,
+                                                  vbfloat16mf2x3_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x3_t test_vluxseg3ei16_v_bf16m1x3_mu(vbool16_t vm,
+                                                vbfloat16m1x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x3_t test_vluxseg3ei16_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg3ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg4ei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg4ei16.c
new file mode 100644
index 000000000..387738336
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg4ei16.c
@@ -0,0 +1,113 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_tu(vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_tu(vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_tum(vbool64_t vm,
+                                                   vbfloat16mf4x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_tum(vbool32_t vm,
+                                                   vbfloat16mf2x4_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_tum(vbool16_t vm,
+                                                 vbfloat16m1x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_tum(vbool8_t vm,
+                                                 vbfloat16m2x4_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x4_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_tumu(vbool16_t vm,
+                                                  vbfloat16m1x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_tumu(vbool8_t vm,
+                                                  vbfloat16m2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x4_t test_vluxseg4ei16_v_bf16mf4x4_mu(vbool64_t vm,
+                                                  vbfloat16mf4x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x4_t test_vluxseg4ei16_v_bf16mf2x4_mu(vbool32_t vm,
+                                                  vbfloat16mf2x4_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x4_t test_vluxseg4ei16_v_bf16m1x4_mu(vbool16_t vm,
+                                                vbfloat16m1x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m2x4_t test_vluxseg4ei16_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m2_t rs2, size_t vl) {
+  return __riscv_vluxseg4ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg5ei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg5ei16.c
new file mode 100644
index 000000000..e44715aab
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg5ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_tu(vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_tum(vbool64_t vm,
+                                                   vbfloat16mf4x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg5ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_tum(vbool32_t vm,
+                                                   vbfloat16mf2x5_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg5ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_tum(vbool16_t vm,
+                                                 vbfloat16m1x5_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg5ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x5_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg5ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_tumu(vbool16_t vm,
+                                                  vbfloat16m1x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x5_t test_vluxseg5ei16_v_bf16mf4x5_mu(vbool64_t vm,
+                                                  vbfloat16mf4x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x5_t test_vluxseg5ei16_v_bf16mf2x5_mu(vbool32_t vm,
+                                                  vbfloat16mf2x5_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x5_t test_vluxseg5ei16_v_bf16m1x5_mu(vbool16_t vm,
+                                                vbfloat16m1x5_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg5ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg6ei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg6ei16.c
new file mode 100644
index 000000000..86655a32a
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg6ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_tu(vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_tum(vbool64_t vm,
+                                                   vbfloat16mf4x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg6ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_tum(vbool32_t vm,
+                                                   vbfloat16mf2x6_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg6ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_tum(vbool16_t vm,
+                                                 vbfloat16m1x6_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg6ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x6_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg6ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_tumu(vbool16_t vm,
+                                                  vbfloat16m1x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x6_t test_vluxseg6ei16_v_bf16mf4x6_mu(vbool64_t vm,
+                                                  vbfloat16mf4x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x6_t test_vluxseg6ei16_v_bf16mf2x6_mu(vbool32_t vm,
+                                                  vbfloat16mf2x6_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x6_t test_vluxseg6ei16_v_bf16m1x6_mu(vbool16_t vm,
+                                                vbfloat16m1x6_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg6ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg7ei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg7ei16.c
new file mode 100644
index 000000000..f0473d13d
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg7ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_tu(vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_tum(vbool64_t vm,
+                                                   vbfloat16mf4x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg7ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_tum(vbool32_t vm,
+                                                   vbfloat16mf2x7_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg7ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_tum(vbool16_t vm,
+                                                 vbfloat16m1x7_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg7ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x7_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg7ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_tumu(vbool16_t vm,
+                                                  vbfloat16m1x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x7_t test_vluxseg7ei16_v_bf16mf4x7_mu(vbool64_t vm,
+                                                  vbfloat16mf4x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x7_t test_vluxseg7ei16_v_bf16mf2x7_mu(vbool32_t vm,
+                                                  vbfloat16mf2x7_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x7_t test_vluxseg7ei16_v_bf16m1x7_mu(vbool16_t vm,
+                                                vbfloat16m1x7_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg7ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg8ei16.c b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg8ei16.c
new file mode 100644
index 000000000..07ed8156f
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded-api-testing/vluxseg8ei16.c
@@ -0,0 +1,87 @@
+#include <riscv_vector.h>
+#include <stdint.h>
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_tu(vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_tu(vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_tum(vbool64_t vm,
+                                                   vbfloat16mf4x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf4_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg8ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_tum(vbool32_t vm,
+                                                   vbfloat16mf2x8_t vd,
+                                                   const __bf16 *rs1,
+                                                   vuint16mf2_t rs2,
+                                                   size_t vl) {
+  return __riscv_vluxseg8ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_tum(vbool16_t vm,
+                                                 vbfloat16m1x8_t vd,
+                                                 const __bf16 *rs1,
+                                                 vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_tum(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_tumu(vbool64_t vm,
+                                                    vbfloat16mf4x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf4_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg8ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_tumu(vbool32_t vm,
+                                                    vbfloat16mf2x8_t vd,
+                                                    const __bf16 *rs1,
+                                                    vuint16mf2_t rs2,
+                                                    size_t vl) {
+  return __riscv_vluxseg8ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_tumu(vbool16_t vm,
+                                                  vbfloat16m1x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_tumu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf4x8_t test_vluxseg8ei16_v_bf16mf4x8_mu(vbool64_t vm,
+                                                  vbfloat16mf4x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf4_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16mf2x8_t test_vluxseg8ei16_v_bf16mf2x8_mu(vbool32_t vm,
+                                                  vbfloat16mf2x8_t vd,
+                                                  const __bf16 *rs1,
+                                                  vuint16mf2_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_mu(vm, vd, rs1, rs2, vl);
+}
+
+vbfloat16m1x8_t test_vluxseg8ei16_v_bf16m1x8_mu(vbool16_t vm,
+                                                vbfloat16m1x8_t vd,
+                                                const __bf16 *rs1,
+                                                vuint16m1_t rs2, size_t vl) {
+  return __riscv_vluxseg8ei16_mu(vm, vd, rs1, rs2, vl);
+}
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs.adoc b/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs.adoc
new file mode 100644
index 000000000..266e06b4c
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs.adoc
@@ -0,0 +1,2100 @@
+
+=== BFloat16 Vector Loads and Stores Intrinsics
+
+[[policy-variant-overloadedbf16-vector-unit-stride-load]]
+==== Vector Unit-Stride Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vle16_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                size_t vl);
+vbfloat16mf2_t __riscv_vle16_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                size_t vl);
+vbfloat16m1_t __riscv_vle16_tu(vbfloat16m1_t vd, const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_tu(vbfloat16m2_t vd, const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16_tu(vbfloat16m4_t vd, const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16_tu(vbfloat16m8_t vd, const __bf16 *rs1, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                const __bf16 *rs1, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                 const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                 const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                 const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                 const __bf16 *rs1, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16_mu(vbool16_t vm, vbfloat16m1_t vd,
+                               const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_mu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1,
+                               size_t vl);
+vbfloat16m4_t __riscv_vle16_mu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1,
+                               size_t vl);
+vbfloat16m8_t __riscv_vle16_mu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1,
+                               size_t vl);
+----
+
+[[policy-variant-overloadedbf16-vector-unit-stride-store]]
+==== Vector Unit-Stride Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-strided-load]]
+==== Vector Strided Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vlse16_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                 ptrdiff_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vlse16_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vlse16_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                ptrdiff_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vlse16_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                ptrdiff_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vlse16_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                ptrdiff_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vlse16_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vlse16_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                  const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vlse16_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                  const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vlse16_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                 const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vlse16_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                 const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vlse16_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                 const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vlse16_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                 const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vlse16_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                   const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vlse16_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                   const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vlse16_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                  const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vlse16_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                  const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vlse16_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                  const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vlse16_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                  const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vlse16_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                 const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vlse16_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                 const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vlse16_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vlse16_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vlse16_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vlse16_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+----
+
+[[policy-variant-overloadedvector-strided-store]]
+==== Vector Strided Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-indexed-load]]
+==== Vector Indexed Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vloxei16_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                   vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                   vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vloxei16_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vloxei16_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                  vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vloxei16_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                  vuint16m8_t rs2, size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                   vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                   vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vluxei16_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vluxei16_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                  vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vluxei16_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                  vuint16m8_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                    const __bf16 *rs1, vuint16mf4_t rs2,
+                                    size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                    const __bf16 *rs1, vuint16mf2_t rs2,
+                                    size_t vl);
+vbfloat16m1_t __riscv_vloxei16_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                   const __bf16 *rs1, vuint16m1_t rs2,
+                                   size_t vl);
+vbfloat16m2_t __riscv_vloxei16_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                   const __bf16 *rs1, vuint16m2_t rs2,
+                                   size_t vl);
+vbfloat16m4_t __riscv_vloxei16_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                   const __bf16 *rs1, vuint16m4_t rs2,
+                                   size_t vl);
+vbfloat16m8_t __riscv_vloxei16_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                   const __bf16 *rs1, vuint16m8_t rs2,
+                                   size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                    const __bf16 *rs1, vuint16mf4_t rs2,
+                                    size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                    const __bf16 *rs1, vuint16mf2_t rs2,
+                                    size_t vl);
+vbfloat16m1_t __riscv_vluxei16_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                   const __bf16 *rs1, vuint16m1_t rs2,
+                                   size_t vl);
+vbfloat16m2_t __riscv_vluxei16_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                   const __bf16 *rs1, vuint16m2_t rs2,
+                                   size_t vl);
+vbfloat16m4_t __riscv_vluxei16_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                   const __bf16 *rs1, vuint16m4_t rs2,
+                                   size_t vl);
+vbfloat16m8_t __riscv_vluxei16_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                   const __bf16 *rs1, vuint16m8_t rs2,
+                                   size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                     const __bf16 *rs1, vuint16mf4_t rs2,
+                                     size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                     const __bf16 *rs1, vuint16mf2_t rs2,
+                                     size_t vl);
+vbfloat16m1_t __riscv_vloxei16_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                    const __bf16 *rs1, vuint16m1_t rs2,
+                                    size_t vl);
+vbfloat16m2_t __riscv_vloxei16_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                    const __bf16 *rs1, vuint16m2_t rs2,
+                                    size_t vl);
+vbfloat16m4_t __riscv_vloxei16_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                    const __bf16 *rs1, vuint16m4_t rs2,
+                                    size_t vl);
+vbfloat16m8_t __riscv_vloxei16_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                    const __bf16 *rs1, vuint16m8_t rs2,
+                                    size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                     const __bf16 *rs1, vuint16mf4_t rs2,
+                                     size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                     const __bf16 *rs1, vuint16mf2_t rs2,
+                                     size_t vl);
+vbfloat16m1_t __riscv_vluxei16_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                    const __bf16 *rs1, vuint16m1_t rs2,
+                                    size_t vl);
+vbfloat16m2_t __riscv_vluxei16_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                    const __bf16 *rs1, vuint16m2_t rs2,
+                                    size_t vl);
+vbfloat16m4_t __riscv_vluxei16_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                    const __bf16 *rs1, vuint16m4_t rs2,
+                                    size_t vl);
+vbfloat16m8_t __riscv_vluxei16_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                    const __bf16 *rs1, vuint16m8_t rs2,
+                                    size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                   const __bf16 *rs1, vuint16mf4_t rs2,
+                                   size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                   const __bf16 *rs1, vuint16mf2_t rs2,
+                                   size_t vl);
+vbfloat16m1_t __riscv_vloxei16_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                  const __bf16 *rs1, vuint16m1_t rs2,
+                                  size_t vl);
+vbfloat16m2_t __riscv_vloxei16_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                  const __bf16 *rs1, vuint16m2_t rs2,
+                                  size_t vl);
+vbfloat16m4_t __riscv_vloxei16_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                  const __bf16 *rs1, vuint16m4_t rs2,
+                                  size_t vl);
+vbfloat16m8_t __riscv_vloxei16_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                  const __bf16 *rs1, vuint16m8_t rs2,
+                                  size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                   const __bf16 *rs1, vuint16mf4_t rs2,
+                                   size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                   const __bf16 *rs1, vuint16mf2_t rs2,
+                                   size_t vl);
+vbfloat16m1_t __riscv_vluxei16_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                  const __bf16 *rs1, vuint16m1_t rs2,
+                                  size_t vl);
+vbfloat16m2_t __riscv_vluxei16_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                  const __bf16 *rs1, vuint16m2_t rs2,
+                                  size_t vl);
+vbfloat16m4_t __riscv_vluxei16_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                  const __bf16 *rs1, vuint16m4_t rs2,
+                                  size_t vl);
+vbfloat16m8_t __riscv_vluxei16_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                  const __bf16 *rs1, vuint16m8_t rs2,
+                                  size_t vl);
+----
+
+[[policy-variant-overloadedvector-indexed-store]]
+==== Vector Indexed Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedunit-stride-fault-only-first-loads]]
+==== Unit-stride Fault-Only-First Loads Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vle16ff_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                  size_t *new_vl, size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                  size_t *new_vl, size_t vl);
+vbfloat16m1_t __riscv_vle16ff_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                 size_t *new_vl, size_t vl);
+vbfloat16m2_t __riscv_vle16ff_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                 size_t *new_vl, size_t vl);
+vbfloat16m4_t __riscv_vle16ff_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                 size_t *new_vl, size_t vl);
+vbfloat16m8_t __riscv_vle16ff_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                 size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                   const __bf16 *rs1, size_t *new_vl,
+                                   size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                   const __bf16 *rs1, size_t *new_vl,
+                                   size_t vl);
+vbfloat16m1_t __riscv_vle16ff_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                  const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16m2_t __riscv_vle16ff_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                  const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16m4_t __riscv_vle16ff_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                  const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16m8_t __riscv_vle16ff_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                  const __bf16 *rs1, size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                    const __bf16 *rs1, size_t *new_vl,
+                                    size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                    const __bf16 *rs1, size_t *new_vl,
+                                    size_t vl);
+vbfloat16m1_t __riscv_vle16ff_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                   const __bf16 *rs1, size_t *new_vl,
+                                   size_t vl);
+vbfloat16m2_t __riscv_vle16ff_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                   const __bf16 *rs1, size_t *new_vl,
+                                   size_t vl);
+vbfloat16m4_t __riscv_vle16ff_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                   const __bf16 *rs1, size_t *new_vl,
+                                   size_t vl);
+vbfloat16m8_t __riscv_vle16ff_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                   const __bf16 *rs1, size_t *new_vl,
+                                   size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                  const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                  const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16m1_t __riscv_vle16ff_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                 const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16m2_t __riscv_vle16ff_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                 const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16m4_t __riscv_vle16ff_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                 const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16m8_t __riscv_vle16ff_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                 const __bf16 *rs1, size_t *new_vl, size_t vl);
+----
+
+=== BFloat16 Vector Loads and Stores Segment Intrinsics
+
+[[policy-variant-overloadedvector-unit-stride-segment-load]]
+==== Vector Unit-Stride Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vlseg2e16_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_tu(vbfloat16m1x2_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_tu(vbfloat16m1x3_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_tu(vbfloat16m1x4_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_tu(vbfloat16m1x5_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_tu(vbfloat16m1x6_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_tu(vbfloat16m1x7_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_tu(vbfloat16m1x8_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_tu(vbfloat16m2x2_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_tu(vbfloat16m2x3_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_tu(vbfloat16m2x4_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_tu(vbfloat16m4x2_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_tu(vbfloat16m1x2_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_tu(vbfloat16m1x3_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_tu(vbfloat16m1x4_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_tu(vbfloat16m1x5_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_tu(vbfloat16m1x6_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_tu(vbfloat16m1x7_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_tu(vbfloat16m1x8_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_tu(vbfloat16m2x2_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_tu(vbfloat16m2x3_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_tu(vbfloat16m2x4_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_tu(vbfloat16m4x2_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16_tum(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_tum(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_tum(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_tum(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_tum(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_tum(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_tum(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_tum(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_tum(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_tum(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_tum(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_tum(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_tum(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_tum(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_tum(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_tum(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_tum(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_tum(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_tum(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_tum(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_tum(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_tum(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_tum(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_tum(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_tum(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_tum(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_tum(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_tum(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16_tumu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_tumu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_tumu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_tumu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_tumu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_tumu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_tumu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_tumu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_tumu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_tumu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_tumu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_tumu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_tumu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_tumu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_tumu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_tumu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_tumu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_tumu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_tumu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_tumu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_tumu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_tumu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_tumu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_tumu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_tumu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_tumu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_tumu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_tumu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_tumu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_tumu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_tumu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_tumu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_tumu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_tumu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_tumu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_tumu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_tumu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_tumu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_tumu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_tumu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_tumu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_tumu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16_mu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_mu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_mu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_mu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_mu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_mu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_mu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_mu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_mu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_mu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_mu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_mu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_mu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_mu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_mu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_mu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_mu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_mu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_mu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_mu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_mu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_mu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_mu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_mu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_mu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_mu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_mu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_mu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+----
+
+[[policy-variant-overloadedvecrtor-unit-stride-segment-store]]
+==== Vector Unit-Stride Segment Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-strided-segment-load]]
+==== Vector Strided Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vlsseg2e16_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_tu(vbfloat16m1x2_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_tu(vbfloat16m1x3_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_tu(vbfloat16m1x4_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_tu(vbfloat16m1x5_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_tu(vbfloat16m1x6_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_tu(vbfloat16m1x7_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_tu(vbfloat16m1x8_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_tu(vbfloat16m2x2_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_tu(vbfloat16m2x3_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_tu(vbfloat16m2x4_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_tu(vbfloat16m4x2_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16_tum(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_tum(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_tum(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_tum(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_tum(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_tum(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_tum(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_tum(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_tum(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_tum(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_tum(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_tum(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_tum(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_tum(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16_tumu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_tumu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_tumu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_tumu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_tumu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_tumu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_tumu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_tumu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_tumu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_tumu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_tumu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_tumu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_tumu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_tumu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_tumu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_tumu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_tumu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_tumu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_tumu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_tumu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_tumu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16_mu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_mu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_mu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_mu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_mu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_mu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_mu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_mu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_mu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_mu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_mu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_mu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_mu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_mu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+----
+
+[[policy-variant-overloadedvector-strided-segment-store]]
+==== Vector Strided Segment Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-indexed-segment-load]]
+==== Vector Indexed Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_tu(vbfloat16m1x2_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_tu(vbfloat16m1x3_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_tu(vbfloat16m1x4_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_tu(vbfloat16m1x5_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_tu(vbfloat16m1x6_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_tu(vbfloat16m1x7_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_tu(vbfloat16m1x8_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_tu(vbfloat16m2x2_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_tu(vbfloat16m2x3_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_tu(vbfloat16m2x4_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_tu(vbfloat16m4x2_t vd, const __bf16 *rs1,
+                                        vuint16m4_t rs2, size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_tu(vbfloat16m1x2_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_tu(vbfloat16m1x3_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_tu(vbfloat16m1x4_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_tu(vbfloat16m1x5_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_tu(vbfloat16m1x6_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_tu(vbfloat16m1x7_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_tu(vbfloat16m1x8_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_tu(vbfloat16m2x2_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_tu(vbfloat16m2x3_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_tu(vbfloat16m2x4_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_tu(vbfloat16m4x2_t vd, const __bf16 *rs1,
+                                        vuint16m4_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_tum(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_tum(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_tum(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_tum(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_tum(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_tum(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_tum(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_tum(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_tum(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_tum(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_tum(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_tum(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_tum(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_tum(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                         const __bf16 *rs1, vuint16m4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_tum(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_tum(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_tum(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_tum(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_tum(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_tum(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_tum(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_tum(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_tum(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_tum(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_tum(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_tum(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_tum(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_tum(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                         const __bf16 *rs1, vuint16m4_t rs2,
+                                         size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_tumu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_tumu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_tumu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_tumu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_tumu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_tumu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_tumu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_tumu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_tumu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_tumu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_tumu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_tumu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_tumu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_tumu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_tumu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_tumu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_tumu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_tumu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_tumu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_tumu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_tumu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                          const __bf16 *rs1, vuint16m4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_tumu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_tumu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_tumu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_tumu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_tumu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_tumu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_tumu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_tumu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_tumu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_tumu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_tumu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_tumu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_tumu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_tumu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_tumu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_tumu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_tumu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_tumu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_tumu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_tumu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_tumu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                          const __bf16 *rs1, vuint16m4_t rs2,
+                                          size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_mu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_mu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_mu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_mu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_mu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_mu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_mu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_mu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_mu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_mu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_mu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_mu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_mu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_mu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                        const __bf16 *rs1, vuint16m4_t rs2,
+                                        size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_mu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_mu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_mu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_mu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_mu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_mu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_mu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_mu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_mu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_mu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_mu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_mu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_mu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_mu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                        const __bf16 *rs1, vuint16m4_t rs2,
+                                        size_t vl);
+----
+
+[[policy-variant-overloadedvector-indexed-segment-store]]
+==== Vector Indexed Segment Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+=== BFloat16 Convert Intrinsics
+
+[[policy-variant-overloadedbf16-vector-narrow-convert]]
+==== Vector Narrowing Convert Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vfncvtbf16_f_tu(vbfloat16mf4_t vd, vfloat32mf2_t vs2,
+                                       size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_tu(vbfloat16mf2_t vd, vfloat32m1_t vs2,
+                                       size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_tu(vbfloat16m1_t vd, vfloat32m2_t vs2,
+                                      size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_tu(vbfloat16m2_t vd, vfloat32m4_t vs2,
+                                      size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_tu(vbfloat16m4_t vd, vfloat32m8_t vs2,
+                                      size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                        vfloat32mf2_t vs2, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                        vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                       vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                       vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                       vfloat32m8_t vs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                         vfloat32mf2_t vs2, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                         vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                        vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                        vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                        vfloat32m8_t vs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                       vfloat32mf2_t vs2, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                       vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                      vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                      vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                      vfloat32m8_t vs2, size_t vl);
+vbfloat16mf4_t __riscv_vfncvtbf16_f_tu(vbfloat16mf4_t vd, vfloat32mf2_t vs2,
+                                       unsigned int frm, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_tu(vbfloat16mf2_t vd, vfloat32m1_t vs2,
+                                       unsigned int frm, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_tu(vbfloat16m1_t vd, vfloat32m2_t vs2,
+                                      unsigned int frm, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_tu(vbfloat16m2_t vd, vfloat32m4_t vs2,
+                                      unsigned int frm, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_tu(vbfloat16m4_t vd, vfloat32m8_t vs2,
+                                      unsigned int frm, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                        vfloat32mf2_t vs2, unsigned int frm,
+                                        size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                        vfloat32m1_t vs2, unsigned int frm,
+                                        size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                       vfloat32m2_t vs2, unsigned int frm,
+                                       size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                       vfloat32m4_t vs2, unsigned int frm,
+                                       size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                       vfloat32m8_t vs2, unsigned int frm,
+                                       size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                         vfloat32mf2_t vs2, unsigned int frm,
+                                         size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                         vfloat32m1_t vs2, unsigned int frm,
+                                         size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                        vfloat32m2_t vs2, unsigned int frm,
+                                        size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                        vfloat32m4_t vs2, unsigned int frm,
+                                        size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                        vfloat32m8_t vs2, unsigned int frm,
+                                        size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                       vfloat32mf2_t vs2, unsigned int frm,
+                                       size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                       vfloat32m1_t vs2, unsigned int frm,
+                                       size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                      vfloat32m2_t vs2, unsigned int frm,
+                                      size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                      vfloat32m4_t vs2, unsigned int frm,
+                                      size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                      vfloat32m8_t vs2, unsigned int frm,
+                                      size_t vl);
+----
+
+[[policy-variant-overloadedbf16-vector-widening-convert]]
+==== Vector Widening Convert Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwcvtbf16_f_tu(vfloat32mf2_t vd, vbfloat16mf4_t vs2,
+                                      size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_tu(vfloat32m1_t vd, vbfloat16mf2_t vs2,
+                                     size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_tu(vfloat32m2_t vd, vbfloat16m1_t vs2,
+                                     size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_tu(vfloat32m4_t vd, vbfloat16m2_t vs2,
+                                     size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_tu(vfloat32m8_t vd, vbfloat16m4_t vs2,
+                                     size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                       vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_tum(vbool32_t vm, vfloat32m1_t vd,
+                                      vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_tum(vbool16_t vm, vfloat32m2_t vd,
+                                      vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_tum(vbool8_t vm, vfloat32m4_t vd,
+                                      vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_tum(vbool4_t vm, vfloat32m8_t vd,
+                                      vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                        vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                       vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                       vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                       vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                       vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                      vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_mu(vbool32_t vm, vfloat32m1_t vd,
+                                     vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_mu(vbool16_t vm, vfloat32m2_t vd,
+                                     vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_mu(vbool8_t vm, vfloat32m4_t vd,
+                                     vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_mu(vbool4_t vm, vfloat32m8_t vd,
+                                     vbfloat16m4_t vs2, size_t vl);
+----
+
+=== BFloat16 Arithmetic Intrinsics
+
+[[policy-variant-overloadedbf16-widening-multiply-accumulate]]
+==== Vector Widening Multiply-Accumulate Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwmaccbf16_tu(vfloat32mf2_t vd, vbfloat16mf4_t vs1,
+                                     vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_tu(vfloat32mf2_t vd, __bf16 vs1,
+                                     vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tu(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                    vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tu(vfloat32m1_t vd, __bf16 vs1,
+                                    vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tu(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                    vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tu(vfloat32m2_t vd, __bf16 vs1,
+                                    vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tu(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                    vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tu(vfloat32m4_t vd, __bf16 vs1,
+                                    vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tu(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                    vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tu(vfloat32m8_t vd, __bf16 vs1,
+                                    vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                      vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                      size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                      __bf16 vs1, vbfloat16mf4_t vs2,
+                                      size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tum(vbool32_t vm, vfloat32m1_t vd,
+                                     vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                     size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tum(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                     vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tum(vbool16_t vm, vfloat32m2_t vd,
+                                     vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                     size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tum(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                     vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tum(vbool8_t vm, vfloat32m4_t vd,
+                                     vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                     size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tum(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                     vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tum(vbool4_t vm, vfloat32m8_t vd,
+                                     vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                     size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tum(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                     vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                       vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                       size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                       __bf16 vs1, vbfloat16mf4_t vs2,
+                                       size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                      vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                      size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tumu(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                      vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                      vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                      size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tumu(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                      vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                      vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                      size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tumu(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                      vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                      vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                      size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tumu(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                      vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                     vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                     size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_mu(vbool64_t vm, vfloat32mf2_t vd, __bf16 vs1,
+                                     vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_mu(vbool32_t vm, vfloat32m1_t vd,
+                                    vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                    size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_mu(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                    vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_mu(vbool16_t vm, vfloat32m2_t vd,
+                                    vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                    size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_mu(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                    vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_mu(vbool8_t vm, vfloat32m4_t vd,
+                                    vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                    size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_mu(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                    vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_mu(vbool4_t vm, vfloat32m8_t vd,
+                                    vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                    size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_mu(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                    vbfloat16m4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_tu(vfloat32mf2_t vd, vbfloat16mf4_t vs1,
+                                     vbfloat16mf4_t vs2, unsigned int frm,
+                                     size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_tu(vfloat32mf2_t vd, __bf16 vs1,
+                                     vbfloat16mf4_t vs2, unsigned int frm,
+                                     size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tu(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                    vbfloat16mf2_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tu(vfloat32m1_t vd, __bf16 vs1,
+                                    vbfloat16mf2_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tu(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                    vbfloat16m1_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tu(vfloat32m2_t vd, __bf16 vs1,
+                                    vbfloat16m1_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tu(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                    vbfloat16m2_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tu(vfloat32m4_t vd, __bf16 vs1,
+                                    vbfloat16m2_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tu(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                    vbfloat16m4_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tu(vfloat32m8_t vd, __bf16 vs1,
+                                    vbfloat16m4_t vs2, unsigned int frm,
+                                    size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                      vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                      __bf16 vs1, vbfloat16mf4_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tum(vbool32_t vm, vfloat32m1_t vd,
+                                     vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                     unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tum(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                     vbfloat16mf2_t vs2, unsigned int frm,
+                                     size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tum(vbool16_t vm, vfloat32m2_t vd,
+                                     vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                     unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tum(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                     vbfloat16m1_t vs2, unsigned int frm,
+                                     size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tum(vbool8_t vm, vfloat32m4_t vd,
+                                     vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                     unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tum(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                     vbfloat16m2_t vs2, unsigned int frm,
+                                     size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tum(vbool4_t vm, vfloat32m8_t vd,
+                                     vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                     unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tum(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                     vbfloat16m4_t vs2, unsigned int frm,
+                                     size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                       vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                       unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                       __bf16 vs1, vbfloat16mf4_t vs2,
+                                       unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                      vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tumu(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                      vbfloat16mf2_t vs2, unsigned int frm,
+                                      size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                      vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tumu(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                      vbfloat16m1_t vs2, unsigned int frm,
+                                      size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                      vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tumu(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                      vbfloat16m2_t vs2, unsigned int frm,
+                                      size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                      vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tumu(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                      vbfloat16m4_t vs2, unsigned int frm,
+                                      size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                     vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                     unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_mu(vbool64_t vm, vfloat32mf2_t vd, __bf16 vs1,
+                                     vbfloat16mf4_t vs2, unsigned int frm,
+                                     size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_mu(vbool32_t vm, vfloat32m1_t vd,
+                                    vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                    unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_mu(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                    vbfloat16mf2_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_mu(vbool16_t vm, vfloat32m2_t vd,
+                                    vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                    unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_mu(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                    vbfloat16m1_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_mu(vbool8_t vm, vfloat32m4_t vd,
+                                    vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                    unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_mu(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                    vbfloat16m2_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_mu(vbool4_t vm, vfloat32m8_t vd,
+                                    vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                    unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_mu(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                    vbfloat16m4_t vs2, unsigned int frm,
+                                    size_t vl);
+----
+
+=== BFloat16 Miscellaneous Vector Utility Intrinsics
+
+[[policy-variant-overloadedreinterpret-cast-conversion]]
+==== Reinterpret Cast Conversion Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-lmul-extensionn]]
+==== Vector LMUL Extension Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-lmul-truncation]]
+==== Vector LMUL Truncation Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-initialization]]
+==== Vector Initialization Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-insertion]]
+==== Vector Insertion Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-extraction]]
+==== Vector Extraction Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-creation]]
+==== Vector Creation Intrinsics
+Intrinsics here don't have a policy variant.
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs/00_bfloat16_vector_loads_and_stores_intrinsics.adoc b/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs/00_bfloat16_vector_loads_and_stores_intrinsics.adoc
new file mode 100644
index 000000000..17fec1b34
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs/00_bfloat16_vector_loads_and_stores_intrinsics.adoc
@@ -0,0 +1,334 @@
+
+=== BFloat16 Vector Loads and Stores Intrinsics
+
+[[policy-variant-overloadedbf16-vector-unit-stride-load]]
+==== Vector Unit-Stride Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vle16_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                size_t vl);
+vbfloat16mf2_t __riscv_vle16_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                size_t vl);
+vbfloat16m1_t __riscv_vle16_tu(vbfloat16m1_t vd, const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_tu(vbfloat16m2_t vd, const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16_tu(vbfloat16m4_t vd, const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16_tu(vbfloat16m8_t vd, const __bf16 *rs1, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                 const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                 const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                const __bf16 *rs1, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                  const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                  const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                 const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                 const __bf16 *rs1, size_t vl);
+vbfloat16m4_t __riscv_vle16_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                 const __bf16 *rs1, size_t vl);
+vbfloat16m8_t __riscv_vle16_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                 const __bf16 *rs1, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                const __bf16 *rs1, size_t vl);
+vbfloat16mf2_t __riscv_vle16_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                const __bf16 *rs1, size_t vl);
+vbfloat16m1_t __riscv_vle16_mu(vbool16_t vm, vbfloat16m1_t vd,
+                               const __bf16 *rs1, size_t vl);
+vbfloat16m2_t __riscv_vle16_mu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1,
+                               size_t vl);
+vbfloat16m4_t __riscv_vle16_mu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1,
+                               size_t vl);
+vbfloat16m8_t __riscv_vle16_mu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1,
+                               size_t vl);
+----
+
+[[policy-variant-overloadedbf16-vector-unit-stride-store]]
+==== Vector Unit-Stride Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-strided-load]]
+==== Vector Strided Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vlse16_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                 ptrdiff_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vlse16_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                 ptrdiff_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vlse16_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                ptrdiff_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vlse16_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                ptrdiff_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vlse16_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                ptrdiff_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vlse16_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vlse16_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                  const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vlse16_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                  const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vlse16_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                 const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vlse16_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                 const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vlse16_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                 const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vlse16_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                 const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vlse16_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                   const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vlse16_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                   const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vlse16_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                  const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vlse16_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                  const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vlse16_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                  const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vlse16_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                  const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vlse16_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                 const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vlse16_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                 const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vlse16_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vlse16_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vlse16_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vlse16_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                const __bf16 *rs1, ptrdiff_t rs2, size_t vl);
+----
+
+[[policy-variant-overloadedvector-strided-store]]
+==== Vector Strided Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-indexed-load]]
+==== Vector Indexed Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vloxei16_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                   vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                   vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vloxei16_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vloxei16_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vloxei16_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                  vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vloxei16_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                  vuint16m8_t rs2, size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                   vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                   vuint16mf2_t rs2, size_t vl);
+vbfloat16m1_t __riscv_vluxei16_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                  vuint16m1_t rs2, size_t vl);
+vbfloat16m2_t __riscv_vluxei16_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                  vuint16m2_t rs2, size_t vl);
+vbfloat16m4_t __riscv_vluxei16_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                  vuint16m4_t rs2, size_t vl);
+vbfloat16m8_t __riscv_vluxei16_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                  vuint16m8_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                    const __bf16 *rs1, vuint16mf4_t rs2,
+                                    size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                    const __bf16 *rs1, vuint16mf2_t rs2,
+                                    size_t vl);
+vbfloat16m1_t __riscv_vloxei16_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                   const __bf16 *rs1, vuint16m1_t rs2,
+                                   size_t vl);
+vbfloat16m2_t __riscv_vloxei16_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                   const __bf16 *rs1, vuint16m2_t rs2,
+                                   size_t vl);
+vbfloat16m4_t __riscv_vloxei16_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                   const __bf16 *rs1, vuint16m4_t rs2,
+                                   size_t vl);
+vbfloat16m8_t __riscv_vloxei16_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                   const __bf16 *rs1, vuint16m8_t rs2,
+                                   size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                    const __bf16 *rs1, vuint16mf4_t rs2,
+                                    size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                    const __bf16 *rs1, vuint16mf2_t rs2,
+                                    size_t vl);
+vbfloat16m1_t __riscv_vluxei16_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                   const __bf16 *rs1, vuint16m1_t rs2,
+                                   size_t vl);
+vbfloat16m2_t __riscv_vluxei16_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                   const __bf16 *rs1, vuint16m2_t rs2,
+                                   size_t vl);
+vbfloat16m4_t __riscv_vluxei16_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                   const __bf16 *rs1, vuint16m4_t rs2,
+                                   size_t vl);
+vbfloat16m8_t __riscv_vluxei16_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                   const __bf16 *rs1, vuint16m8_t rs2,
+                                   size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                     const __bf16 *rs1, vuint16mf4_t rs2,
+                                     size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                     const __bf16 *rs1, vuint16mf2_t rs2,
+                                     size_t vl);
+vbfloat16m1_t __riscv_vloxei16_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                    const __bf16 *rs1, vuint16m1_t rs2,
+                                    size_t vl);
+vbfloat16m2_t __riscv_vloxei16_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                    const __bf16 *rs1, vuint16m2_t rs2,
+                                    size_t vl);
+vbfloat16m4_t __riscv_vloxei16_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                    const __bf16 *rs1, vuint16m4_t rs2,
+                                    size_t vl);
+vbfloat16m8_t __riscv_vloxei16_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                    const __bf16 *rs1, vuint16m8_t rs2,
+                                    size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                     const __bf16 *rs1, vuint16mf4_t rs2,
+                                     size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                     const __bf16 *rs1, vuint16mf2_t rs2,
+                                     size_t vl);
+vbfloat16m1_t __riscv_vluxei16_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                    const __bf16 *rs1, vuint16m1_t rs2,
+                                    size_t vl);
+vbfloat16m2_t __riscv_vluxei16_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                    const __bf16 *rs1, vuint16m2_t rs2,
+                                    size_t vl);
+vbfloat16m4_t __riscv_vluxei16_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                    const __bf16 *rs1, vuint16m4_t rs2,
+                                    size_t vl);
+vbfloat16m8_t __riscv_vluxei16_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                    const __bf16 *rs1, vuint16m8_t rs2,
+                                    size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vloxei16_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                   const __bf16 *rs1, vuint16mf4_t rs2,
+                                   size_t vl);
+vbfloat16mf2_t __riscv_vloxei16_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                   const __bf16 *rs1, vuint16mf2_t rs2,
+                                   size_t vl);
+vbfloat16m1_t __riscv_vloxei16_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                  const __bf16 *rs1, vuint16m1_t rs2,
+                                  size_t vl);
+vbfloat16m2_t __riscv_vloxei16_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                  const __bf16 *rs1, vuint16m2_t rs2,
+                                  size_t vl);
+vbfloat16m4_t __riscv_vloxei16_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                  const __bf16 *rs1, vuint16m4_t rs2,
+                                  size_t vl);
+vbfloat16m8_t __riscv_vloxei16_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                  const __bf16 *rs1, vuint16m8_t rs2,
+                                  size_t vl);
+vbfloat16mf4_t __riscv_vluxei16_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                   const __bf16 *rs1, vuint16mf4_t rs2,
+                                   size_t vl);
+vbfloat16mf2_t __riscv_vluxei16_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                   const __bf16 *rs1, vuint16mf2_t rs2,
+                                   size_t vl);
+vbfloat16m1_t __riscv_vluxei16_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                  const __bf16 *rs1, vuint16m1_t rs2,
+                                  size_t vl);
+vbfloat16m2_t __riscv_vluxei16_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                  const __bf16 *rs1, vuint16m2_t rs2,
+                                  size_t vl);
+vbfloat16m4_t __riscv_vluxei16_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                  const __bf16 *rs1, vuint16m4_t rs2,
+                                  size_t vl);
+vbfloat16m8_t __riscv_vluxei16_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                  const __bf16 *rs1, vuint16m8_t rs2,
+                                  size_t vl);
+----
+
+[[policy-variant-overloadedvector-indexed-store]]
+==== Vector Indexed Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedunit-stride-fault-only-first-loads]]
+==== Unit-stride Fault-Only-First Loads Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vle16ff_tu(vbfloat16mf4_t vd, const __bf16 *rs1,
+                                  size_t *new_vl, size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_tu(vbfloat16mf2_t vd, const __bf16 *rs1,
+                                  size_t *new_vl, size_t vl);
+vbfloat16m1_t __riscv_vle16ff_tu(vbfloat16m1_t vd, const __bf16 *rs1,
+                                 size_t *new_vl, size_t vl);
+vbfloat16m2_t __riscv_vle16ff_tu(vbfloat16m2_t vd, const __bf16 *rs1,
+                                 size_t *new_vl, size_t vl);
+vbfloat16m4_t __riscv_vle16ff_tu(vbfloat16m4_t vd, const __bf16 *rs1,
+                                 size_t *new_vl, size_t vl);
+vbfloat16m8_t __riscv_vle16ff_tu(vbfloat16m8_t vd, const __bf16 *rs1,
+                                 size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                   const __bf16 *rs1, size_t *new_vl,
+                                   size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                   const __bf16 *rs1, size_t *new_vl,
+                                   size_t vl);
+vbfloat16m1_t __riscv_vle16ff_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                  const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16m2_t __riscv_vle16ff_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                  const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16m4_t __riscv_vle16ff_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                  const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16m8_t __riscv_vle16ff_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                  const __bf16 *rs1, size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                    const __bf16 *rs1, size_t *new_vl,
+                                    size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                    const __bf16 *rs1, size_t *new_vl,
+                                    size_t vl);
+vbfloat16m1_t __riscv_vle16ff_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                   const __bf16 *rs1, size_t *new_vl,
+                                   size_t vl);
+vbfloat16m2_t __riscv_vle16ff_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                   const __bf16 *rs1, size_t *new_vl,
+                                   size_t vl);
+vbfloat16m4_t __riscv_vle16ff_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                   const __bf16 *rs1, size_t *new_vl,
+                                   size_t vl);
+vbfloat16m8_t __riscv_vle16ff_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                   const __bf16 *rs1, size_t *new_vl,
+                                   size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vle16ff_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                  const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16mf2_t __riscv_vle16ff_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                  const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16m1_t __riscv_vle16ff_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                 const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16m2_t __riscv_vle16ff_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                 const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16m4_t __riscv_vle16ff_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                 const __bf16 *rs1, size_t *new_vl, size_t vl);
+vbfloat16m8_t __riscv_vle16ff_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                 const __bf16 *rs1, size_t *new_vl, size_t vl);
+----
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs/01_bfloat16_vector_loads_and_stores_segment_intrinsics.adoc b/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs/01_bfloat16_vector_loads_and_stores_segment_intrinsics.adoc
new file mode 100644
index 000000000..507b4155e
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs/01_bfloat16_vector_loads_and_stores_segment_intrinsics.adoc
@@ -0,0 +1,1344 @@
+
+=== BFloat16 Vector Loads and Stores Segment Intrinsics
+
+[[policy-variant-overloadedvector-unit-stride-segment-load]]
+==== Vector Unit-Stride Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vlseg2e16_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1,
+                                      size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_tu(vbfloat16m1x2_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_tu(vbfloat16m1x3_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_tu(vbfloat16m1x4_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_tu(vbfloat16m1x5_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_tu(vbfloat16m1x6_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_tu(vbfloat16m1x7_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_tu(vbfloat16m1x8_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_tu(vbfloat16m2x2_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_tu(vbfloat16m2x3_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_tu(vbfloat16m2x4_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_tu(vbfloat16m4x2_t vd, const __bf16 *rs1,
+                                     size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1,
+                                        size_t *new_vl, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_tu(vbfloat16m1x2_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_tu(vbfloat16m1x3_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_tu(vbfloat16m1x4_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_tu(vbfloat16m1x5_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_tu(vbfloat16m1x6_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_tu(vbfloat16m1x7_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_tu(vbfloat16m1x8_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_tu(vbfloat16m2x2_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_tu(vbfloat16m2x3_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_tu(vbfloat16m2x4_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_tu(vbfloat16m4x2_t vd, const __bf16 *rs1,
+                                       size_t *new_vl, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16_tum(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_tum(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_tum(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_tum(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_tum(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_tum(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_tum(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_tum(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_tum(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_tum(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_tum(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_tum(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_tum(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_tum(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_tum(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_tum(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_tum(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_tum(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_tum(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_tum(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_tum(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_tum(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_tum(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_tum(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_tum(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_tum(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_tum(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_tum(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16_tumu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_tumu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_tumu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_tumu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_tumu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_tumu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_tumu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_tumu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_tumu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_tumu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_tumu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_tumu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_tumu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_tumu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                        const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_tumu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_tumu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_tumu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_tumu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_tumu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_tumu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_tumu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                       const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_tumu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_tumu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_tumu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_tumu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_tumu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_tumu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_tumu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_tumu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_tumu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_tumu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_tumu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_tumu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_tumu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_tumu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                          const __bf16 *rs1, size_t *new_vl,
+                                          size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_tumu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_tumu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_tumu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_tumu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_tumu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_tumu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_tumu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                         const __bf16 *rs1, size_t *new_vl,
+                                         size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlseg2e16_mu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16_mu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16_mu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16_mu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16_mu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16_mu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16_mu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16_mu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16_mu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16_mu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16_mu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16_mu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16_mu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16_mu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                      const __bf16 *rs1, size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                     const __bf16 *rs1, size_t vl);
+vbfloat16mf4x2_t __riscv_vlseg2e16ff_mu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf4x3_t __riscv_vlseg3e16ff_mu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf4x4_t __riscv_vlseg4e16ff_mu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf4x5_t __riscv_vlseg5e16ff_mu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf4x6_t __riscv_vlseg6e16ff_mu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf4x7_t __riscv_vlseg7e16ff_mu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf4x8_t __riscv_vlseg8e16ff_mu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf2x2_t __riscv_vlseg2e16ff_mu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf2x3_t __riscv_vlseg3e16ff_mu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf2x4_t __riscv_vlseg4e16ff_mu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf2x5_t __riscv_vlseg5e16ff_mu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf2x6_t __riscv_vlseg6e16ff_mu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf2x7_t __riscv_vlseg7e16ff_mu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16mf2x8_t __riscv_vlseg8e16ff_mu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                        const __bf16 *rs1, size_t *new_vl,
+                                        size_t vl);
+vbfloat16m1x2_t __riscv_vlseg2e16ff_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m1x3_t __riscv_vlseg3e16ff_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m1x4_t __riscv_vlseg4e16ff_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m1x5_t __riscv_vlseg5e16ff_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m1x6_t __riscv_vlseg6e16ff_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m1x7_t __riscv_vlseg7e16ff_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m1x8_t __riscv_vlseg8e16ff_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m2x2_t __riscv_vlseg2e16ff_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m2x3_t __riscv_vlseg3e16ff_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m2x4_t __riscv_vlseg4e16ff_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+vbfloat16m4x2_t __riscv_vlseg2e16ff_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                       const __bf16 *rs1, size_t *new_vl,
+                                       size_t vl);
+----
+
+[[policy-variant-overloadedvecrtor-unit-stride-segment-store]]
+==== Vector Unit-Stride Segment Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-strided-segment-load]]
+==== Vector Strided Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vlsseg2e16_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1,
+                                       ptrdiff_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_tu(vbfloat16m1x2_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_tu(vbfloat16m1x3_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_tu(vbfloat16m1x4_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_tu(vbfloat16m1x5_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_tu(vbfloat16m1x6_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_tu(vbfloat16m1x7_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_tu(vbfloat16m1x8_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_tu(vbfloat16m2x2_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_tu(vbfloat16m2x3_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_tu(vbfloat16m2x4_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_tu(vbfloat16m4x2_t vd, const __bf16 *rs1,
+                                      ptrdiff_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16_tum(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_tum(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_tum(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_tum(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_tum(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_tum(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_tum(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_tum(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_tum(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_tum(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_tum(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_tum(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_tum(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_tum(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16_tumu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_tumu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_tumu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_tumu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_tumu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_tumu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_tumu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_tumu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_tumu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_tumu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_tumu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_tumu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_tumu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_tumu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                         const __bf16 *rs1, ptrdiff_t rs2,
+                                         size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_tumu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_tumu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_tumu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_tumu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_tumu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_tumu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_tumu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                        const __bf16 *rs1, ptrdiff_t rs2,
+                                        size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vlsseg2e16_mu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf4x3_t __riscv_vlsseg3e16_mu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf4x4_t __riscv_vlsseg4e16_mu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf4x5_t __riscv_vlsseg5e16_mu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf4x6_t __riscv_vlsseg6e16_mu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf4x7_t __riscv_vlsseg7e16_mu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf4x8_t __riscv_vlsseg8e16_mu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf2x2_t __riscv_vlsseg2e16_mu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf2x3_t __riscv_vlsseg3e16_mu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf2x4_t __riscv_vlsseg4e16_mu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf2x5_t __riscv_vlsseg5e16_mu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf2x6_t __riscv_vlsseg6e16_mu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf2x7_t __riscv_vlsseg7e16_mu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16mf2x8_t __riscv_vlsseg8e16_mu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                       const __bf16 *rs1, ptrdiff_t rs2,
+                                       size_t vl);
+vbfloat16m1x2_t __riscv_vlsseg2e16_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m1x3_t __riscv_vlsseg3e16_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m1x4_t __riscv_vlsseg4e16_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m1x5_t __riscv_vlsseg5e16_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m1x6_t __riscv_vlsseg6e16_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m1x7_t __riscv_vlsseg7e16_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m1x8_t __riscv_vlsseg8e16_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m2x2_t __riscv_vlsseg2e16_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m2x3_t __riscv_vlsseg3e16_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m2x4_t __riscv_vlsseg4e16_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+vbfloat16m4x2_t __riscv_vlsseg2e16_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                      const __bf16 *rs1, ptrdiff_t rs2,
+                                      size_t vl);
+----
+
+[[policy-variant-overloadedvector-strided-segment-store]]
+==== Vector Strided Segment Store Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-indexed-segment-load]]
+==== Vector Indexed Segment Load Intrinsics
+
+[,c]
+----
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_tu(vbfloat16m1x2_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_tu(vbfloat16m1x3_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_tu(vbfloat16m1x4_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_tu(vbfloat16m1x5_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_tu(vbfloat16m1x6_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_tu(vbfloat16m1x7_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_tu(vbfloat16m1x8_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_tu(vbfloat16m2x2_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_tu(vbfloat16m2x3_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_tu(vbfloat16m2x4_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_tu(vbfloat16m4x2_t vd, const __bf16 *rs1,
+                                        vuint16m4_t rs2, size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1,
+                                         vuint16mf4_t rs2, size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1,
+                                         vuint16mf2_t rs2, size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_tu(vbfloat16m1x2_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_tu(vbfloat16m1x3_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_tu(vbfloat16m1x4_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_tu(vbfloat16m1x5_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_tu(vbfloat16m1x6_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_tu(vbfloat16m1x7_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_tu(vbfloat16m1x8_t vd, const __bf16 *rs1,
+                                        vuint16m1_t rs2, size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_tu(vbfloat16m2x2_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_tu(vbfloat16m2x3_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_tu(vbfloat16m2x4_t vd, const __bf16 *rs1,
+                                        vuint16m2_t rs2, size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_tu(vbfloat16m4x2_t vd, const __bf16 *rs1,
+                                        vuint16m4_t rs2, size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_tum(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_tum(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_tum(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_tum(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_tum(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_tum(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_tum(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_tum(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_tum(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_tum(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_tum(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_tum(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_tum(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_tum(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                         const __bf16 *rs1, vuint16m4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_tum(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_tum(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_tum(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_tum(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_tum(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_tum(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_tum(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                          const __bf16 *rs1, vuint16mf4_t rs2,
+                                          size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_tum(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_tum(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_tum(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_tum(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_tum(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_tum(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_tum(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                          const __bf16 *rs1, vuint16mf2_t rs2,
+                                          size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_tum(vbool16_t vm, vbfloat16m1x2_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_tum(vbool16_t vm, vbfloat16m1x3_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_tum(vbool16_t vm, vbfloat16m1x4_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_tum(vbool16_t vm, vbfloat16m1x5_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_tum(vbool16_t vm, vbfloat16m1x6_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_tum(vbool16_t vm, vbfloat16m1x7_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_tum(vbool16_t vm, vbfloat16m1x8_t vd,
+                                         const __bf16 *rs1, vuint16m1_t rs2,
+                                         size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_tum(vbool8_t vm, vbfloat16m2x2_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_tum(vbool8_t vm, vbfloat16m2x3_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_tum(vbool8_t vm, vbfloat16m2x4_t vd,
+                                         const __bf16 *rs1, vuint16m2_t rs2,
+                                         size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_tum(vbool4_t vm, vbfloat16m4x2_t vd,
+                                         const __bf16 *rs1, vuint16m4_t rs2,
+                                         size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_tumu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_tumu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_tumu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_tumu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_tumu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_tumu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_tumu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_tumu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_tumu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_tumu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_tumu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_tumu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_tumu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_tumu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_tumu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_tumu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_tumu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_tumu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_tumu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_tumu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_tumu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                          const __bf16 *rs1, vuint16m4_t rs2,
+                                          size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_tumu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_tumu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_tumu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_tumu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_tumu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_tumu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_tumu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                           const __bf16 *rs1, vuint16mf4_t rs2,
+                                           size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_tumu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_tumu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_tumu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_tumu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_tumu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_tumu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_tumu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                           const __bf16 *rs1, vuint16mf2_t rs2,
+                                           size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_tumu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_tumu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_tumu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_tumu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_tumu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_tumu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_tumu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                          const __bf16 *rs1, vuint16m1_t rs2,
+                                          size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_tumu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_tumu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_tumu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                          const __bf16 *rs1, vuint16m2_t rs2,
+                                          size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_tumu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                          const __bf16 *rs1, vuint16m4_t rs2,
+                                          size_t vl);
+// masked functions
+vbfloat16mf4x2_t __riscv_vloxseg2ei16_mu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x3_t __riscv_vloxseg3ei16_mu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x4_t __riscv_vloxseg4ei16_mu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x5_t __riscv_vloxseg5ei16_mu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x6_t __riscv_vloxseg6ei16_mu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x7_t __riscv_vloxseg7ei16_mu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x8_t __riscv_vloxseg8ei16_mu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf2x2_t __riscv_vloxseg2ei16_mu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x3_t __riscv_vloxseg3ei16_mu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x4_t __riscv_vloxseg4ei16_mu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x5_t __riscv_vloxseg5ei16_mu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x6_t __riscv_vloxseg6ei16_mu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x7_t __riscv_vloxseg7ei16_mu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x8_t __riscv_vloxseg8ei16_mu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16m1x2_t __riscv_vloxseg2ei16_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x3_t __riscv_vloxseg3ei16_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x4_t __riscv_vloxseg4ei16_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x5_t __riscv_vloxseg5ei16_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x6_t __riscv_vloxseg6ei16_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x7_t __riscv_vloxseg7ei16_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x8_t __riscv_vloxseg8ei16_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m2x2_t __riscv_vloxseg2ei16_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m2x3_t __riscv_vloxseg3ei16_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m2x4_t __riscv_vloxseg4ei16_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m4x2_t __riscv_vloxseg2ei16_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                        const __bf16 *rs1, vuint16m4_t rs2,
+                                        size_t vl);
+vbfloat16mf4x2_t __riscv_vluxseg2ei16_mu(vbool64_t vm, vbfloat16mf4x2_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x3_t __riscv_vluxseg3ei16_mu(vbool64_t vm, vbfloat16mf4x3_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x4_t __riscv_vluxseg4ei16_mu(vbool64_t vm, vbfloat16mf4x4_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x5_t __riscv_vluxseg5ei16_mu(vbool64_t vm, vbfloat16mf4x5_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x6_t __riscv_vluxseg6ei16_mu(vbool64_t vm, vbfloat16mf4x6_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x7_t __riscv_vluxseg7ei16_mu(vbool64_t vm, vbfloat16mf4x7_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf4x8_t __riscv_vluxseg8ei16_mu(vbool64_t vm, vbfloat16mf4x8_t vd,
+                                         const __bf16 *rs1, vuint16mf4_t rs2,
+                                         size_t vl);
+vbfloat16mf2x2_t __riscv_vluxseg2ei16_mu(vbool32_t vm, vbfloat16mf2x2_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x3_t __riscv_vluxseg3ei16_mu(vbool32_t vm, vbfloat16mf2x3_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x4_t __riscv_vluxseg4ei16_mu(vbool32_t vm, vbfloat16mf2x4_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x5_t __riscv_vluxseg5ei16_mu(vbool32_t vm, vbfloat16mf2x5_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x6_t __riscv_vluxseg6ei16_mu(vbool32_t vm, vbfloat16mf2x6_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x7_t __riscv_vluxseg7ei16_mu(vbool32_t vm, vbfloat16mf2x7_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16mf2x8_t __riscv_vluxseg8ei16_mu(vbool32_t vm, vbfloat16mf2x8_t vd,
+                                         const __bf16 *rs1, vuint16mf2_t rs2,
+                                         size_t vl);
+vbfloat16m1x2_t __riscv_vluxseg2ei16_mu(vbool16_t vm, vbfloat16m1x2_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x3_t __riscv_vluxseg3ei16_mu(vbool16_t vm, vbfloat16m1x3_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x4_t __riscv_vluxseg4ei16_mu(vbool16_t vm, vbfloat16m1x4_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x5_t __riscv_vluxseg5ei16_mu(vbool16_t vm, vbfloat16m1x5_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x6_t __riscv_vluxseg6ei16_mu(vbool16_t vm, vbfloat16m1x6_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x7_t __riscv_vluxseg7ei16_mu(vbool16_t vm, vbfloat16m1x7_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m1x8_t __riscv_vluxseg8ei16_mu(vbool16_t vm, vbfloat16m1x8_t vd,
+                                        const __bf16 *rs1, vuint16m1_t rs2,
+                                        size_t vl);
+vbfloat16m2x2_t __riscv_vluxseg2ei16_mu(vbool8_t vm, vbfloat16m2x2_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m2x3_t __riscv_vluxseg3ei16_mu(vbool8_t vm, vbfloat16m2x3_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m2x4_t __riscv_vluxseg4ei16_mu(vbool8_t vm, vbfloat16m2x4_t vd,
+                                        const __bf16 *rs1, vuint16m2_t rs2,
+                                        size_t vl);
+vbfloat16m4x2_t __riscv_vluxseg2ei16_mu(vbool4_t vm, vbfloat16m4x2_t vd,
+                                        const __bf16 *rs1, vuint16m4_t rs2,
+                                        size_t vl);
+----
+
+[[policy-variant-overloadedvector-indexed-segment-store]]
+==== Vector Indexed Segment Store Intrinsics
+Intrinsics here don't have a policy variant.
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs/02_bfloat16_convert_intrinsics.adoc b/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs/02_bfloat16_convert_intrinsics.adoc
new file mode 100644
index 000000000..94b1ff8f3
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs/02_bfloat16_convert_intrinsics.adoc
@@ -0,0 +1,160 @@
+
+=== BFloat16 Convert Intrinsics
+
+[[policy-variant-overloadedbf16-vector-narrow-convert]]
+==== Vector Narrowing Convert Intrinsics
+
+[,c]
+----
+vbfloat16mf4_t __riscv_vfncvtbf16_f_tu(vbfloat16mf4_t vd, vfloat32mf2_t vs2,
+                                       size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_tu(vbfloat16mf2_t vd, vfloat32m1_t vs2,
+                                       size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_tu(vbfloat16m1_t vd, vfloat32m2_t vs2,
+                                      size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_tu(vbfloat16m2_t vd, vfloat32m4_t vs2,
+                                      size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_tu(vbfloat16m4_t vd, vfloat32m8_t vs2,
+                                      size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                        vfloat32mf2_t vs2, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                        vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                       vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                       vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                       vfloat32m8_t vs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                         vfloat32mf2_t vs2, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                         vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                        vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                        vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                        vfloat32m8_t vs2, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                       vfloat32mf2_t vs2, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                       vfloat32m1_t vs2, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                      vfloat32m2_t vs2, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                      vfloat32m4_t vs2, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                      vfloat32m8_t vs2, size_t vl);
+vbfloat16mf4_t __riscv_vfncvtbf16_f_tu(vbfloat16mf4_t vd, vfloat32mf2_t vs2,
+                                       unsigned int frm, size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_tu(vbfloat16mf2_t vd, vfloat32m1_t vs2,
+                                       unsigned int frm, size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_tu(vbfloat16m1_t vd, vfloat32m2_t vs2,
+                                      unsigned int frm, size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_tu(vbfloat16m2_t vd, vfloat32m4_t vs2,
+                                      unsigned int frm, size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_tu(vbfloat16m4_t vd, vfloat32m8_t vs2,
+                                      unsigned int frm, size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                        vfloat32mf2_t vs2, unsigned int frm,
+                                        size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                        vfloat32m1_t vs2, unsigned int frm,
+                                        size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                       vfloat32m2_t vs2, unsigned int frm,
+                                       size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                       vfloat32m4_t vs2, unsigned int frm,
+                                       size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                       vfloat32m8_t vs2, unsigned int frm,
+                                       size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                         vfloat32mf2_t vs2, unsigned int frm,
+                                         size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                         vfloat32m1_t vs2, unsigned int frm,
+                                         size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                        vfloat32m2_t vs2, unsigned int frm,
+                                        size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                        vfloat32m4_t vs2, unsigned int frm,
+                                        size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                        vfloat32m8_t vs2, unsigned int frm,
+                                        size_t vl);
+// masked functions
+vbfloat16mf4_t __riscv_vfncvtbf16_f_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                       vfloat32mf2_t vs2, unsigned int frm,
+                                       size_t vl);
+vbfloat16mf2_t __riscv_vfncvtbf16_f_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                       vfloat32m1_t vs2, unsigned int frm,
+                                       size_t vl);
+vbfloat16m1_t __riscv_vfncvtbf16_f_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                      vfloat32m2_t vs2, unsigned int frm,
+                                      size_t vl);
+vbfloat16m2_t __riscv_vfncvtbf16_f_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                      vfloat32m4_t vs2, unsigned int frm,
+                                      size_t vl);
+vbfloat16m4_t __riscv_vfncvtbf16_f_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                      vfloat32m8_t vs2, unsigned int frm,
+                                      size_t vl);
+----
+
+[[policy-variant-overloadedbf16-vector-widening-convert]]
+==== Vector Widening Convert Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwcvtbf16_f_tu(vfloat32mf2_t vd, vbfloat16mf4_t vs2,
+                                      size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_tu(vfloat32m1_t vd, vbfloat16mf2_t vs2,
+                                     size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_tu(vfloat32m2_t vd, vbfloat16m1_t vs2,
+                                     size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_tu(vfloat32m4_t vd, vbfloat16m2_t vs2,
+                                     size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_tu(vfloat32m8_t vd, vbfloat16m4_t vs2,
+                                     size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                       vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_tum(vbool32_t vm, vfloat32m1_t vd,
+                                      vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_tum(vbool16_t vm, vfloat32m2_t vd,
+                                      vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_tum(vbool8_t vm, vfloat32m4_t vd,
+                                      vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_tum(vbool4_t vm, vfloat32m8_t vd,
+                                      vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                        vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                       vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                       vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                       vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                       vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwcvtbf16_f_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                      vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwcvtbf16_f_mu(vbool32_t vm, vfloat32m1_t vd,
+                                     vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwcvtbf16_f_mu(vbool16_t vm, vfloat32m2_t vd,
+                                     vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwcvtbf16_f_mu(vbool8_t vm, vfloat32m4_t vd,
+                                     vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwcvtbf16_f_mu(vbool4_t vm, vfloat32m8_t vd,
+                                     vbfloat16m4_t vs2, size_t vl);
+----
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs/03_bfloat16_arithmetic_intrinsics.adoc b/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs/03_bfloat16_arithmetic_intrinsics.adoc
new file mode 100644
index 000000000..64c886112
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs/03_bfloat16_arithmetic_intrinsics.adoc
@@ -0,0 +1,232 @@
+
+=== BFloat16 Arithmetic Intrinsics
+
+[[policy-variant-overloadedbf16-widening-multiply-accumulate]]
+==== Vector Widening Multiply-Accumulate Intrinsics
+
+[,c]
+----
+vfloat32mf2_t __riscv_vfwmaccbf16_tu(vfloat32mf2_t vd, vbfloat16mf4_t vs1,
+                                     vbfloat16mf4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_tu(vfloat32mf2_t vd, __bf16 vs1,
+                                     vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tu(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                    vbfloat16mf2_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tu(vfloat32m1_t vd, __bf16 vs1,
+                                    vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tu(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                    vbfloat16m1_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tu(vfloat32m2_t vd, __bf16 vs1,
+                                    vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tu(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                    vbfloat16m2_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tu(vfloat32m4_t vd, __bf16 vs1,
+                                    vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tu(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                    vbfloat16m4_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tu(vfloat32m8_t vd, __bf16 vs1,
+                                    vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                      vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                      size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                      __bf16 vs1, vbfloat16mf4_t vs2,
+                                      size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tum(vbool32_t vm, vfloat32m1_t vd,
+                                     vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                     size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tum(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                     vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tum(vbool16_t vm, vfloat32m2_t vd,
+                                     vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                     size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tum(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                     vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tum(vbool8_t vm, vfloat32m4_t vd,
+                                     vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                     size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tum(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                     vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tum(vbool4_t vm, vfloat32m8_t vd,
+                                     vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                     size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tum(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                     vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                       vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                       size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                       __bf16 vs1, vbfloat16mf4_t vs2,
+                                       size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                      vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                      size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tumu(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                      vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                      vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                      size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tumu(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                      vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                      vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                      size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tumu(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                      vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                      vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                      size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tumu(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                      vbfloat16m4_t vs2, size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                     vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                     size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_mu(vbool64_t vm, vfloat32mf2_t vd, __bf16 vs1,
+                                     vbfloat16mf4_t vs2, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_mu(vbool32_t vm, vfloat32m1_t vd,
+                                    vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                    size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_mu(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                    vbfloat16mf2_t vs2, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_mu(vbool16_t vm, vfloat32m2_t vd,
+                                    vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                    size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_mu(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                    vbfloat16m1_t vs2, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_mu(vbool8_t vm, vfloat32m4_t vd,
+                                    vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                    size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_mu(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                    vbfloat16m2_t vs2, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_mu(vbool4_t vm, vfloat32m8_t vd,
+                                    vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                    size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_mu(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                    vbfloat16m4_t vs2, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_tu(vfloat32mf2_t vd, vbfloat16mf4_t vs1,
+                                     vbfloat16mf4_t vs2, unsigned int frm,
+                                     size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_tu(vfloat32mf2_t vd, __bf16 vs1,
+                                     vbfloat16mf4_t vs2, unsigned int frm,
+                                     size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tu(vfloat32m1_t vd, vbfloat16mf2_t vs1,
+                                    vbfloat16mf2_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tu(vfloat32m1_t vd, __bf16 vs1,
+                                    vbfloat16mf2_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tu(vfloat32m2_t vd, vbfloat16m1_t vs1,
+                                    vbfloat16m1_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tu(vfloat32m2_t vd, __bf16 vs1,
+                                    vbfloat16m1_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tu(vfloat32m4_t vd, vbfloat16m2_t vs1,
+                                    vbfloat16m2_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tu(vfloat32m4_t vd, __bf16 vs1,
+                                    vbfloat16m2_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tu(vfloat32m8_t vd, vbfloat16m4_t vs1,
+                                    vbfloat16m4_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tu(vfloat32m8_t vd, __bf16 vs1,
+                                    vbfloat16m4_t vs2, unsigned int frm,
+                                    size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                      vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_tum(vbool64_t vm, vfloat32mf2_t vd,
+                                      __bf16 vs1, vbfloat16mf4_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tum(vbool32_t vm, vfloat32m1_t vd,
+                                     vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                     unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tum(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                     vbfloat16mf2_t vs2, unsigned int frm,
+                                     size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tum(vbool16_t vm, vfloat32m2_t vd,
+                                     vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                     unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tum(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                     vbfloat16m1_t vs2, unsigned int frm,
+                                     size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tum(vbool8_t vm, vfloat32m4_t vd,
+                                     vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                     unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tum(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                     vbfloat16m2_t vs2, unsigned int frm,
+                                     size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tum(vbool4_t vm, vfloat32m8_t vd,
+                                     vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                     unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tum(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                     vbfloat16m4_t vs2, unsigned int frm,
+                                     size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                       vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                       unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_tumu(vbool64_t vm, vfloat32mf2_t vd,
+                                       __bf16 vs1, vbfloat16mf4_t vs2,
+                                       unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tumu(vbool32_t vm, vfloat32m1_t vd,
+                                      vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_tumu(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                      vbfloat16mf2_t vs2, unsigned int frm,
+                                      size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tumu(vbool16_t vm, vfloat32m2_t vd,
+                                      vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_tumu(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                      vbfloat16m1_t vs2, unsigned int frm,
+                                      size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tumu(vbool8_t vm, vfloat32m4_t vd,
+                                      vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_tumu(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                      vbfloat16m2_t vs2, unsigned int frm,
+                                      size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tumu(vbool4_t vm, vfloat32m8_t vd,
+                                      vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                      unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_tumu(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                      vbfloat16m4_t vs2, unsigned int frm,
+                                      size_t vl);
+// masked functions
+vfloat32mf2_t __riscv_vfwmaccbf16_mu(vbool64_t vm, vfloat32mf2_t vd,
+                                     vbfloat16mf4_t vs1, vbfloat16mf4_t vs2,
+                                     unsigned int frm, size_t vl);
+vfloat32mf2_t __riscv_vfwmaccbf16_mu(vbool64_t vm, vfloat32mf2_t vd, __bf16 vs1,
+                                     vbfloat16mf4_t vs2, unsigned int frm,
+                                     size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_mu(vbool32_t vm, vfloat32m1_t vd,
+                                    vbfloat16mf2_t vs1, vbfloat16mf2_t vs2,
+                                    unsigned int frm, size_t vl);
+vfloat32m1_t __riscv_vfwmaccbf16_mu(vbool32_t vm, vfloat32m1_t vd, __bf16 vs1,
+                                    vbfloat16mf2_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_mu(vbool16_t vm, vfloat32m2_t vd,
+                                    vbfloat16m1_t vs1, vbfloat16m1_t vs2,
+                                    unsigned int frm, size_t vl);
+vfloat32m2_t __riscv_vfwmaccbf16_mu(vbool16_t vm, vfloat32m2_t vd, __bf16 vs1,
+                                    vbfloat16m1_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_mu(vbool8_t vm, vfloat32m4_t vd,
+                                    vbfloat16m2_t vs1, vbfloat16m2_t vs2,
+                                    unsigned int frm, size_t vl);
+vfloat32m4_t __riscv_vfwmaccbf16_mu(vbool8_t vm, vfloat32m4_t vd, __bf16 vs1,
+                                    vbfloat16m2_t vs2, unsigned int frm,
+                                    size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_mu(vbool4_t vm, vfloat32m8_t vd,
+                                    vbfloat16m4_t vs1, vbfloat16m4_t vs2,
+                                    unsigned int frm, size_t vl);
+vfloat32m8_t __riscv_vfwmaccbf16_mu(vbool4_t vm, vfloat32m8_t vd, __bf16 vs1,
+                                    vbfloat16m4_t vs2, unsigned int frm,
+                                    size_t vl);
+----
diff --git a/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs/04_bfloat16_miscellaneous_vector_utility_intrinsics.adoc b/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs/04_bfloat16_miscellaneous_vector_utility_intrinsics.adoc
new file mode 100644
index 000000000..db730fe08
--- /dev/null
+++ b/auto-generated/bfloat16/policy_funcs/overloaded_intrinsic_funcs/04_bfloat16_miscellaneous_vector_utility_intrinsics.adoc
@@ -0,0 +1,30 @@
+
+=== BFloat16 Miscellaneous Vector Utility Intrinsics
+
+[[policy-variant-overloadedreinterpret-cast-conversion]]
+==== Reinterpret Cast Conversion Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-lmul-extensionn]]
+==== Vector LMUL Extension Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-lmul-truncation]]
+==== Vector LMUL Truncation Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-initialization]]
+==== Vector Initialization Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-insertion]]
+==== Vector Insertion Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-extraction]]
+==== Vector Extraction Intrinsics
+Intrinsics here don't have a policy variant.
+
+[[policy-variant-overloadedvector-creation]]
+==== Vector Creation Intrinsics
+Intrinsics here don't have a policy variant.
diff --git a/doc/header.adoc b/doc/header.adoc
index 04dfcaf7e..e6b753f7b 100644
--- a/doc/header.adoc
+++ b/doc/header.adoc
@@ -46,6 +46,10 @@ may not conform to the future standard.
 include::preface.adoc[]
 
 include::rvv-intrinsic-spec.adoc[]
+include::vector-bfloat16-spec.adoc[]
+
+include::references.adoc[]
+
 
 ifeval::["{build-type}" != "quick"]
 [appendix]
diff --git a/doc/references.adoc b/doc/references.adoc
new file mode 100644
index 000000000..d5197e47f
--- /dev/null
+++ b/doc/references.adoc
@@ -0,0 +1,67 @@
+== References
+
+^0^https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc[Github - riscv/riscv-v-spec/v-spec.adoc]
+
+NOTE: Standard extensions are merged into `riscv/riscv-isa-manual` after ratification. There is an on-going pull request ^26^ for the "V" extension to be merged. At this moment this intrinsics specification still references the frozen draft ^0^. This reference will be updated in the future once the pull request has been merged.
+
+^1^https://github.com/riscv-non-isa/riscv-c-api-doc/blob/master/riscv-c-api.md[Github - riscv-non-isa/riscv-c-api-doc/riscv-c-api.md]
+
+^2^https://llvm.org/docs/RISCVUsage.html[User Guide for RISC-V Target]
+
+^3^https://gcc.gnu.org/onlinedocs/gcc/RISC-V-Options.html[RISC-V Options (Using the GNU Compiler Collection (GCC))]
+
+^4^Section 3.4.1 (Vector selected element width `vsew[2:0]`) in the specification ^0^
+
+^5^Section 3.4.2 (Vector Register Grouping (`vlmul[2:0]``)) in the specification ^0^
+
+^6^Section 3.4.3 (Vector Tail Agnostic and Vector Mask Agnostic `vta` and `vma`) in the specification ^0^
+
+^7^Section 5.3 (Vector Masking) in the specification ^0^
+
+^8^Section 3.8 (Vector Fixed-Point Rounding Mode Register `vxrm`) in the specification ^0^
+
+^9^https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#vector-register-convention[psABI: Vector Register Convention]
+
+^10^https://riscv.org/wp-content/uploads/2017/05/riscv-spec-v2.2.pdf[The RISC-V Instruction Set Manual: 8.2 Floating-Point Control and Status Register]
+
+^11^Section 3.5 (Vector Length Register) in the specification ^0^
+
+^12^Section 3.4.2 in the specification ^0^
+
+^13^Section 11.13, 11.14, 13.6, 13.7 in the specification ^0^
+
+^14^Section 4.5 (Mask Register Layout) in the specification ^0^
+
+^15^Section 7.5 in the specification ^0^
+
+^16^Section 7.8 in the specification ^0^
+
+^17^Section 5.2 (Vector Operands) in the specification ^0^
+
+^18^Section 6 (Configuration-Setting Instructions) in the specification ^0^
+
+^19^Section 18 (Standrad Vector Extensions) in the specification ^0^
+
+^20^Section 18.2 (Zve*: Vector Extensions for Embedded Processors) in the specification ^0^
+
+^21^Section 12 (Vector Fixed-Point Arithmetic Instructions) in the specification ^0^
+
+^22^Section 3.9 (3.9. Vector Fixed-Point Saturation Flag vxsat) in the specification ^0^
+
+^23^Section 13 (Vector Floating-Point Instructions) in the specification ^0^
+
+^24^Section 16.3.1 (Vector Slideup Instructions) in the specification ^0^
+
+^25^Section 3.7 (Vector Start Index CSR `vstart`) in the specification ^0^
+
+^26^https://github.com/riscv/riscv-isa-manual/pull/1088[riscv/riscv-isa-manual#1088]
+
+^27^Section 6.3 (Constraints on Setting `vl`) in the specficiation ^0^
+
+^28^Section 6.4 (Example of stripmining and changes to SEW) in the specification ^0^
+
+^29^Section 3.6 (Vector Byte Length `vlenb`) in the specification ^0^
+
+^30^Section 16.6 (Whole Vector Register Move) in the specification ^0^
+
+^31^https://github.com/riscv/riscv-bfloat16/releases[RISC-V BFloat16 Specification]
\ No newline at end of file
diff --git a/doc/vector-bfloat16-spec.adoc b/doc/vector-bfloat16-spec.adoc
new file mode 100644
index 000000000..77d041a45
--- /dev/null
+++ b/doc/vector-bfloat16-spec.adoc
@@ -0,0 +1,45 @@
+== Intrinsics for BFloat16 (Brain Float 16) instruction set extensions
+
+The RISC-V vector C intrinsics supports intrinsics that exposes the control of BFloat16 (Brain Float 16) instruction set extensions ^31^.
+
+[[bf16-naming-scheme]]
+=== Naming scheme
+
+The BFloat16 intrinsics follows the naming scheme defined under <<naming-scheme>>, with `bf` as the abbreviation for BFloat16 types in the function suffix.
+
+[[bf16-vector-programming-model]]
+=== Control of the vector extension programming model
+
+The BFloat16 intrinsics follows provides the same control of the vector programming model defined under <<control-of-vector-programming-model>>. Intrinsics that represents BFloat16 instructions that are affected by `frm` (`vfncvtbf16.f.f.w` and `vfwmaccbf16`) follow what is defined under <<control-of-frm>> and provides variants of <<implicit-frm>> and <<explicit-frm>>.
+
+[[bf16-type-system]]
+=== Type system
+
+Floating-point types have EEW and EMUL encoded into the type. The first row describes the EMUL and the first column describes the data type and element width of the scalar type.
+
+Floating-point types with element widths of 16 (Types=`__bf16`) require the `zfbfmin` and `zvfbfmin` extension to be specified in the architecture.
+
+NOTE: Although C++23 introduces `<stdfloat>` for fixed-width floating-point types, this latest standard is not yet supported in the upstream RISC-V compiler. The specification (along with the prototype lists in appendix) uses `__bf16` to represent the BFloat16 floating-point type.
+
+.BFloat16 types
+[options="autowidth,header",float="center",align="center",cols="<1,<2,<2,<2,<2,<2,<2,<2"]
+|===
+| Types | EMUL=1/8 | EMUL=1/4 | EMUL=1/ 2 | EMUL=1 | EMUL=2 | EMUL=4 | EMUL=8
+| __bf16 | N/A | vbfloat16mf4_t | vbfloat16mf2_t | vbfloat16m1_t | vbfloat16m2_t | vbfloat16m4_t | vbfloat16m8_t
+|===
+
+.Tuple types
+[options="autowidth,header",float="center",align="center",cols="<1,<2,<2,<2,<2,<2,<2,<2"]
+|===
+| Non-tuple Types (NFILED=1)| NFIELD=2 | NFIELD=3 | NFIELD=4 | NFIELD=5 | NFIELD=6 | NFIELD=7 | NFIELD=8
+| vbfloat16mf4_t | vbfloat16mf4x2_t | vbfloat16mf4x3_t | vbfloat16mf4x4_t | vbfloat16mf4x5_t | vbfloat16mf4x6_t | vbfloat16mf4x7_t | vbfloat16mf4x8_t
+| vbfloat16mf2_t | vbfloat16mf2x2_t | vbfloat16mf2x3_t | vbfloat16mf2x4_t | vbfloat16mf2x5_t | vbfloat16mf2x6_t | vbfloat16mf2x7_t | vbfloat16mf2x8_t
+| vbfloat16m1_t | vbfloat16m1x2_t | vbfloat16m1x3_t | vbfloat16m1x4_t | vbfloat16m1x5_t | vbfloat16m1x6_t | vbfloat16m1x7_t | vbfloat16m1x8_t
+| vbfloat16m2_t | vbfloat16m2x2_t | vbfloat16m2x3_t | vbfloat16m2x4_t | N/A | N/A | N/A | N/A
+| vbfloat16m4_t | vbfloat16m4x2_t | N/A | N/A | N/A | N/A | N/A | N/A
+|===
+
+[[bf16-pseudo-intrinsics]]
+=== Psuedo intrinsics
+
+The RISC-V vector BFloat16 types (provided under <<bf16-type-system>>) also have pseudo intrinsics variants from <<pseudo-intrinsics>> to help variable declaration and manipulation across intrinsic types.
diff --git a/rvv-intrinsic-generator/Makefile b/rvv-intrinsic-generator/Makefile
index 3d26481ae..5044f51ff 100644
--- a/rvv-intrinsic-generator/Makefile
+++ b/rvv-intrinsic-generator/Makefile
@@ -51,6 +51,8 @@ PYTHONPATHS = $(RVV_INTRINSIC_GEN_PATH):$(ABS_VENDOR_PATH)
 PY3 := PYTHONPATH=$$PYTHONPATH:$(PYTHONPATHS) python3
 # Main entry script of the generator
 MAIN := rvv_intrinsic_gen.main
+# BFloat16 instruction scripts
+BF16_INST := $(RVV_INTRINSIC_GEN_PATH)/bfloat16_inst.py
 # Script to clang-format the auto-generated adoc files
 CLANG_FORMAT_ADOC = clang_format_autogen
 # Main output directory is default to auto-generated
@@ -60,6 +62,10 @@ OUTPUT_DIR := ../auto-generated
 DIR := $(abspath $(OUTPUT_DIR))
 # Output directory for policy intrinsics
 POLICY_DIR := $(DIR)/policy_funcs
+# Output directory for bfloat16 non-policy intrinsics
+BF16_DIR := $(DIR)/bfloat16
+# Output directory for bfloat16 policy intrinsics
+BF16_POLICY_DIR := $(BF16_DIR)/policy_funcs
 # Directory that stores the v0.10 unit tests
 LEGACY_API_TESTS_DIR := $(abspath ../legacy-api-unit-tests)
 # Derived variable to trigger option --vendor-inst
@@ -140,15 +146,20 @@ endef
 # If VENDOR_GENERATOR_SCRIPT is defined, also trigger it in all.
 # NOTE: A possible enhancement to this is allow multiple targets be added here
 ifdef VENDOR_GENERATOR_SCRIPT
-all: gen-document gen-test gen-compatible-header vendor-generator
+all: gen-document gen-test gen-compatible-header bf16-all vendor-generator
 else
-all: gen-document gen-test gen-compatible-header
+all: gen-document gen-test gen-compatible-header bf16-all
 endif
 
+bf16-all: gen-bf16-document gen-bf16-test
+
 gen-document: non-overloaded-doc non-overloaded-docs overloaded-doc overloaded-docs
+gen-bf16-document: bf16-non-overloaded-doc bf16-non-overloaded-docs bf16-overloaded-doc bf16-overloaded-docs
 gen-test: non-overloaded-test overloaded-test gen-llvm-test gen-gnu-test
+gen-bf16-test: bf16-non-overloaded-test bf16-overloaded-test gen-bf16-llvm-test
 gen-compatible-header: non-policy-compatible-header policy-compatible-header non-policy-overloaded-compatible-header policy-overloaded-compatible-header
 gen-llvm-test: llvm-non-overloaded-test llvm-overloaded-test
+gen-bf16-llvm-test: bf16-llvm-non-overloaded-test bf16-llvm-overloaded-test
 gen-gnu-test: gnu-overloaded-test gnu-non-overloaded-test
 
 # Generate all-in-one document for non-overloaded intrinsics
@@ -221,6 +232,64 @@ gnu-overloaded-test:
 	$(call gen_tests,$(DIR)/gnu-overloaded-tests,overloaded-test,--toolchain-type gnu)
 	$(call gen_tests,$(POLICY_DIR)/gnu-overloaded-tests,overloaded-test,--toolchain-type gnu --has-policy)
 
+# BFloat16 documents
+bf16-non-overloaded-doc:
+	$(call gen_doc, $(BF16_DIR),intrinsic_funcs.adoc,non-overloaded-doc,--skip-default-inst --vendor-inst $(BF16_INST))
+	$(call gen_doc, $(BF16_POLICY_DIR),intrinsic_funcs.adoc,non-overloaded-doc,--has-policy --skip-default-inst --vendor-inst $(BF16_INST))
+	$(call clang_format_adoc, --file, $(BF16_DIR)/intrinsic_funcs.adoc)
+	$(call clang_format_adoc, --file, $(BF16_POLICY_DIR)/intrinsic_funcs.adoc)
+
+bf16-non-overloaded-docs:
+	$(call gen_doc, $(BF16_DIR),intrinsic_funcs,non-overloaded-docs,--skip-default-inst --vendor-inst $(BF16_INST))
+	$(call gen_doc, $(BF16_POLICY_DIR),intrinsic_funcs,non-overloaded-docs,--has-policy --skip-default-inst --vendor-inst $(BF16_INST))
+	$(call clang_format_adoc, --folder, $(BF16_DIR)/intrinsic_funcs)
+	$(call clang_format_adoc, --folder, $(BF16_POLICY_DIR)/intrinsic_funcs)
+
+bf16-overloaded-doc:
+	$(call gen_doc, $(BF16_DIR),overloaded_intrinsic_funcs.adoc,overloaded-doc,--skip-default-inst --vendor-inst $(BF16_INST))
+	$(call gen_doc, $(BF16_POLICY_DIR),overloaded_intrinsic_funcs.adoc,overloaded-doc,--has-policy --skip-default-inst --vendor-inst $(BF16_INST))
+	$(call clang_format_adoc, --file, $(BF16_DIR)/overloaded_intrinsic_funcs.adoc)
+	$(call clang_format_adoc, --file, $(BF16_POLICY_DIR)/overloaded_intrinsic_funcs.adoc)
+
+bf16-overloaded-docs:
+	$(call gen_doc, $(BF16_DIR),overloaded_intrinsic_funcs,overloaded-docs,--skip-default-inst --vendor-inst $(BF16_INST))
+	$(call gen_doc, $(BF16_POLICY_DIR),overloaded_intrinsic_funcs,overloaded-docs,--has-policy --skip-default-inst --vendor-inst $(BF16_INST))
+	$(call clang_format_adoc, --folder, $(BF16_DIR)/overloaded_intrinsic_funcs)
+	$(call clang_format_adoc, --folder, $(BF16_POLICY_DIR)/overloaded_intrinsic_funcs)
+
+# BFloat16 tests
+# Generate non-overloaded intrinsic testing C source files
+bf16-non-overloaded-test:
+	$(call gen_tests,$(BF16_DIR)/api-testing,non-overloaded-test,--skip-default-inst --vendor-inst $(BF16_INST))
+	$(call gen_tests,$(BF16_POLICY_DIR)/api-testing,non-overloaded-test,--has-policy --skip-default-inst --vendor-inst $(BF16_INST))
+	clang-format -i $(BF16_DIR)/api-testing/*
+	clang-format -i $(BF16_POLICY_DIR)/api-testing/*
+
+# Generate overloaded intrinsic testing C source files
+bf16-overloaded-test:
+	$(call gen_tests,$(BF16_DIR)/overloaded-api-testing,overloaded-test,--skip-default-inst --vendor-inst $(BF16_INST))
+	$(call gen_tests,$(BF16_POLICY_DIR)/overloaded-api-testing,overloaded-test,--has-policy --skip-default-inst --vendor-inst $(BF16_INST))
+	clang-format -i $(BF16_DIR)/overloaded-api-testing/*
+	clang-format -i $(BF16_POLICY_DIR)/overloaded-api-testing/*
+
+# Generate non-overloaded intrinsic testing C source files
+bf16-llvm-non-overloaded-test:
+	$(call gen_tests,$(BF16_DIR)/llvm-api-tests,non-overloaded-test,--toolchain-type llvm --skip-default-inst --vendor-inst $(BF16_INST))
+	$(call gen_tests,$(BF16_POLICY_DIR)/llvm-api-tests,non-overloaded-test,--toolchain-type llvm --has-policy --skip-default-inst --vendor-inst $(BF16_INST))
+	$(call replace_float, $(BF16_DIR)/llvm-api-tests)
+	$(call replace_float, $(BF16_POLICY_DIR)/llvm-api-tests)
+	clang-format -i $(BF16_DIR)/llvm-api-tests/*
+	clang-format -i $(BF16_POLICY_DIR)/overloaded-api-testing/*
+
+# Generate overloaded intrinsic testing C source files
+bf16-llvm-overloaded-test:
+	$(call gen_tests,$(BF16_DIR)/llvm-overloaded-tests,overloaded-test,--toolchain-type llvm --skip-default-inst --vendor-inst $(BF16_INST))
+	$(call gen_tests,$(BF16_POLICY_DIR)/llvm-overloaded-tests,overloaded-test,--toolchain-type llvm --has-policy --skip-default-inst --vendor-inst $(BF16_INST))
+	$(call replace_float, $(BF16_DIR)/llvm-overloaded-tests)
+	$(call replace_float, $(BF16_POLICY_DIR)/llvm-overloaded-tests)
+	clang-format -i $(BF16_DIR)/llvm-overloaded-tests/*
+	clang-format -i $(BF16_POLICY_DIR)/llvm-overloaded-tests/*
+
 # Generate the adaptor header for v0.10
 non-policy-compatible-header:
 	$(call gen_doc,$(DIR)/rvv-v0p10-compatible-headers,non-policy.h,non-overloaded-compatible-header,)
@@ -251,18 +320,32 @@ git-commit-all:
 	make git-commit-autogen-doc OUTPUT_DIR=${OUTPUT_DIR}
 	make git-commit-autogen-test OUTPUT_DIR=${OUTPUT_DIR}
 
+git-commit-bf16-all:
+	make git-commit-autogen-bf16-doc OUTPUT_DIR=${OUTPUT_DIR}
+	make git-commit-autogen-bf16-test OUTPUT_DIR=${OUTPUT_DIR}
+
 # Update and commit all documents under auto-generated
 git-commit-autogen-doc:
 	make gen-document OUTPUT_DIR=${OUTPUT_DIR}
 	git add ${DIR}/*
 	git commit -m "[Auto-gen] Update documents under ${OUTPUT_DIR}. (make git-commit-autogen-doc)"
 
+git-commit-autogen-bf16-doc:
+	make gen-bf16-document OUTPUT_DIR=${OUTPUT_DIR}
+	git add ${BF16_DIR}/*
+	git commit -m "[Auto-gen] Update bfloat16 documents under ${OUTPUT_DIR}. (make git-commit-autogen-bf16-doc)"
+
 # Update and commit all testing C source files under auto-generated
 git-commit-autogen-test:
 	make gen-test
 	git add ${DIR}/*
 	git commit -m "[Auto-gen] Update tests under ${OUTPUT_DIR}. (make git-commit-autogen-test)"
 
+git-commit-autogen-bf16-test:
+	make gen-bf16-test
+	git add ${BF16_DIR}/*
+	git commit -m "[Auto-gen] Update bfloat16 tests under ${OUTPUT_DIR}. (make git-commit-autogen-bf16-test)"
+
 # Update and commit compatible headers under auto-generated
 git-commit-autogen-compatible-header:
 	make gen-compatible-header
diff --git a/rvv-intrinsic-generator/rvv_intrinsic_gen/bfloat16_inst.py b/rvv-intrinsic-generator/rvv_intrinsic_gen/bfloat16_inst.py
new file mode 100644
index 000000000..a0f4925fc
--- /dev/null
+++ b/rvv-intrinsic-generator/rvv_intrinsic_gen/bfloat16_inst.py
@@ -0,0 +1,161 @@
+"""
+--------------------------------------------------------------------------------
+Copyright 2023 SiFive Inc
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--------------------------------------------------------------------------------
+
+Declares the BFloat16 intrinsics and links to the templates for its
+realization into function prototype. The documents are generated under the
+sequence and grouping.
+"""
+
+from intrinsic_decorator import IntrinsicDecorators
+from generator import CompatibleHeaderGenerator
+from templates import load_template
+from templates import seg_load_template
+from templates import store_template
+from templates import seg_store_template
+from templates import reint_op_template
+from templates import get_set_diff_lmul_op_template
+from templates import misc_op_template
+from templates import cvt_op_template
+from templates import mac_template
+from constants import LMULS, WLMULS, NCVTLMULS
+
+SEWS = [16]
+NSEWS = [32]
+TYPES = ["bfloat"]
+
+
+def gen(g):
+  if isinstance(g, CompatibleHeaderGenerator):
+    assert False, "BFloat16 intrinsics is supported after v1.0"
+  decorators = IntrinsicDecorators(g.has_tail_policy)
+
+  ####################################################################
+  g.start_group("BFloat16 Vector Loads and Stores Intrinsics")
+
+  g.function_group(load_template, "Vector Unit-Stride Load Intrinsics",
+                   "bf16-vector-unit-stride-load", ["vle"], TYPES, SEWS, LMULS,
+                   decorators.has_masking_maskedoff_policy)
+
+  g.function_group(store_template, "Vector Unit-Stride Store Intrinsics",
+                   "bf16-vector-unit-stride-store", ["vse"], TYPES, SEWS, LMULS,
+                   decorators.has_masking_no_maskedoff)
+
+  g.function_group(load_template, "Vector Strided Load Intrinsics",
+                   "vector-strided-load", ["vlse"], TYPES, SEWS, LMULS,
+                   decorators.has_masking_maskedoff_policy)
+
+  g.function_group(store_template, "Vector Strided Store Intrinsics",
+                   "vector-strided-store", ["vsse"], TYPES, SEWS, LMULS,
+                   decorators.has_masking_no_maskedoff)
+
+  g.function_group(load_template, "Vector Indexed Load Intrinsics",
+                   "vector-indexed-load", ["vloxei", "vluxei"], TYPES, SEWS,
+                   LMULS, decorators.has_masking_maskedoff_policy)
+
+  g.function_group(store_template, "Vector Indexed Store Intrinsics",
+                   "vector-indexed-store", ["vsoxei", "vsuxei"], TYPES, SEWS,
+                   LMULS, decorators.has_masking_no_maskedoff)
+
+  g.function_group(load_template,
+                   "Unit-stride Fault-Only-First Loads Intrinsics",
+                   "unit-stride-fault-only-first-loads", ["vleff"], TYPES, SEWS,
+                   LMULS, decorators.has_masking_maskedoff_policy)
+
+  ####################################################################
+  g.start_group("BFloat16 Vector Loads and Stores Segment Intrinsics")
+
+  g.function_group(seg_load_template,
+                   "Vector Unit-Stride Segment Load Intrinsics",
+                   "vector-unit-stride-segment-load", ["vlseg", "vlsegff"],
+                   TYPES, SEWS, LMULS, decorators.has_masking_maskedoff_policy)
+
+  g.function_group(seg_store_template,
+                   "Vector Unit-Stride Segment Store Intrinsics",
+                   "vecrtor-unit-stride-segment-store", ["vsseg"], TYPES, SEWS,
+                   LMULS, decorators.has_masking_no_maskedoff)
+
+  g.function_group(seg_load_template, "Vector Strided Segment Load Intrinsics",
+                   "vector-strided-segment-load", ["vlsseg"], TYPES, SEWS,
+                   LMULS, decorators.has_masking_maskedoff_policy)
+
+  g.function_group(seg_store_template,
+                   "Vector Strided Segment Store Intrinsics",
+                   "vector-strided-segment-store", ["vssseg"], TYPES, SEWS,
+                   LMULS, decorators.has_masking_no_maskedoff)
+
+  g.function_group(seg_load_template, "Vector Indexed Segment Load Intrinsics",
+                   "vector-indexed-segment-load", ["vloxseg", "vluxseg"], TYPES,
+                   SEWS, LMULS, decorators.has_masking_maskedoff_policy)
+
+  g.function_group(seg_store_template,
+                   "Vector Indexed Segment Store Intrinsics",
+                   "vector-indexed-segment-store", ["vsoxseg", "vsuxseg"],
+                   TYPES, SEWS, LMULS, decorators.has_masking_no_maskedoff)
+
+  ####################################################################
+  g.start_group("BFloat16 Convert Intrinsics")
+
+  g.function_group(cvt_op_template, "Vector Narrowing Convert Intrinsics",
+                   "bf16-vector-narrow-convert", ["ncvtbf16"], "bfloat16",
+                   NSEWS, NCVTLMULS,
+                   decorators.has_masking_maskedoff_policy_frm)
+
+  g.function_group(cvt_op_template, "Vector Widening Convert Intrinsics",
+                   "bf16-vector-widening-convert", ["wcvtbf16"], "bfloat16",
+                   SEWS, WLMULS, decorators.has_masking_maskedoff_policy)
+
+  ####################################################################
+  g.start_group("BFloat16 Arithmetic Intrinsics")
+
+  g.function_group(mac_template,
+                   "Vector Widening Multiply-Accumulate Intrinsics",
+                   "bf16-widening-multiply-accumulate", ["wmaccbf16"], TYPES,
+                   SEWS, WLMULS, decorators.has_masking_no_maskedoff_policy_frm)
+
+  ####################################################################
+  g.start_group("BFloat16 Miscellaneous Vector Utility Intrinsics")
+
+  g.function_group(reint_op_template, "Reinterpret Cast Conversion Intrinsics",
+                   "reinterpret-cast-conversion", ["reinterpret"], "bfloat16",
+                   SEWS, LMULS, decorators.has_no_masking)
+
+  g.function_group(misc_op_template, "Vector LMUL Extension Intrinsics",
+                   "vector-lmul-extensionn", ["vlmul_ext_v"], TYPES, SEWS,
+                   LMULS, decorators.has_no_masking)
+
+  g.function_group(misc_op_template, "Vector LMUL Truncation Intrinsics",
+                   "vector-lmul-truncation", ["vlmul_trunc_v"], TYPES, SEWS,
+                   LMULS, decorators.has_no_masking)
+
+  g.function_group(misc_op_template, "Vector Initialization Intrinsics",
+                   "vector-initialization", ["vundefined"], TYPES, SEWS, LMULS,
+                   decorators.has_no_masking)
+
+  g.function_group(get_set_diff_lmul_op_template, "Vector Insertion Intrinsics",
+                   "vector-insertion", ["vset"], TYPES, SEWS, LMULS,
+                   decorators.has_no_masking)
+
+  g.function_group(get_set_diff_lmul_op_template,
+                   "Vector Extraction Intrinsics", "vector-extraction",
+                   ["vget"], TYPES, SEWS, LMULS, decorators.has_no_masking)
+
+  g.function_group(misc_op_template, "Vector Creation Intrinsics",
+                   "vector-creation", ["vcreate"], TYPES, SEWS, LMULS,
+                   decorators.has_no_masking)
+
+  ####################################################################
+  g.gen_prologue()
diff --git a/rvv-intrinsic-generator/rvv_intrinsic_gen/generator.py b/rvv-intrinsic-generator/rvv_intrinsic_gen/generator.py
index 6acf8402f..517904f4c 100644
--- a/rvv-intrinsic-generator/rvv_intrinsic_gen/generator.py
+++ b/rvv-intrinsic-generator/rvv_intrinsic_gen/generator.py
@@ -82,6 +82,7 @@ def func_name(name):
     name = name.replace("_int", "_i")
     name = name.replace("_float", "_f")
     name = name.replace("_bool", "_b")
+    name = name.replace("_bfloat", "_bf")
     # Follows the naming guideline under riscv-c-api-doc to add the `__riscv_`
     # suffix for all RVV intrinsics.
     name = "__riscv_" + name
@@ -442,7 +443,7 @@ def __init__(self, f, is_overloaded, toolchain_type, has_tail_policy):
     # different op name
     self.test_file_names = []
 
-  def write_file_header(self, has_float_type):
+  def write_file_header(self, has_float_type, has_bfloat16_type):
     #pylint: disable=line-too-long
     int_llvm_header = (r"""// REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone \
@@ -456,6 +457,14 @@ def write_file_header(self, has_float_type):
 // RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
 // RUN:   FileCheck --check-prefix=CHECK-RV64 %s
 
+""")
+    bfloat16_llvm_header = (r"""// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v \
+// RUN:   -target-feature +experimental-zvfbfmin \
+// RUN:   -target-feature +experimental-zvfbfwma -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
 """)
     gnu_header = (
         r"""/* { dg-do compile } */
@@ -464,7 +473,9 @@ def write_file_header(self, has_float_type):
 
 """)
     if self.toolchain_type == ToolChainType.LLVM:
-      if has_float_type:
+      if has_bfloat16_type:
+        self.fd.write(bfloat16_llvm_header)
+      elif has_float_type:
         self.fd.write(float_llvm_header)
       else:
         self.fd.write(int_llvm_header)
@@ -527,6 +538,7 @@ def func(self, inst_info, name, return_type, **kwargs):
     # righteously, there should be a function to determine if an intrinsic
     # has a floating-point variant and have the header emission depend on it.
     has_float_type = func_decl.find("vfloat") != -1
+    has_bfloat16_type = func_decl.find("bf16") != -1
     # NOTE(FIXME): This is logic as a hard fix to test case header emission.
     has_float_type_variant_inst = [
         "macc", "nmacc", "msac", "nmsac", "madd", "nmadd", "msub", "nmsub",
@@ -539,7 +551,7 @@ def func(self, inst_info, name, return_type, **kwargs):
         has_float_type = True
 
     if header:
-      self.write_file_header(has_float_type)
+      self.write_file_header(has_float_type, has_bfloat16_type)
 
     def output_call_arg(arg_name, type_name):
       if ((name.startswith("vget") or name.startswith("vset")) \
diff --git a/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/cvt_op_template.py b/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/cvt_op_template.py
index 48b0a62e7..512a7fe75 100644
--- a/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/cvt_op_template.py
+++ b/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/cvt_op_template.py
@@ -40,6 +40,13 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list):
     # [dst_type, dst_type_short, src_type, src_type_short]
     if type_list == ITYPES:
       convert_set = [["int", "x", "int", "x"], ["uint", "x", "uint", "x"]]
+    elif type_list == "bfloat16":
+      if "ncvtbf16" in op_list:
+        convert_set = [["bfloat", "bf", "float", "f"]]
+      elif "wcvtbf16" in op_list:
+        convert_set = [["float", "f", "bfloat", "bf"]]
+      else:
+        assert False, "Unhandled instruction with type_list = 'bfloat16'"
     else:
       convert_set = [["int", "x", "float", "f"], ["uint", "xu", "float", "f"],
                      ["float", "f", "int", "x"], ["float", "f", "uint", "xu"],
@@ -63,7 +70,7 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list):
       # A double-width IEEE floating-point value can always represent a
       # single-width IEEE floating-point value exactly.
       # So we don't need frm variant for vfwcvt.f.f, and vfwcvt.f.x(u) here
-      if op == "wcvt" and decorator.flags & ExtraAttr.HAS_FRM and\
+      if "wcvt" in op and decorator.flags & ExtraAttr.HAS_FRM and\
          (args["TYPES0"] == args["TYPES2"] or\
           ("float" in args["TYPES0"] and "int" in args["TYPES2"])):
         continue
@@ -75,16 +82,16 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list):
 
       args["MIDDLE"] = "v"
       factor = ""
-      if op == "wcvt":
+      if "wcvt" in op:
         factor = "W"
-      if op == "ncvt":
+      if "ncvt" in op:
         factor = "N"
         args["MIDDLE"] = "w"
 
       args["LLMUL"] = args[factor + "LMUL"]
       args["LSEW"] = args[factor + "SEW"]
 
-      if args["TYPES1"] == "f" or args["TYPES3"] == "f":
+      if "f" in args["TYPES1"] or "f" in args["TYPES3"]:
         args["OP"] = "f" + args["OP"]
 
       if args["TYPES0"] == "uint":
@@ -115,9 +122,17 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list):
       if not type_helper.valid_vtype(dst_type) or\
          not type_helper.valid_vtype(src_type):
         continue
-      func_name = \
-        "{OP}_{TYPES1}_{TYPES3}_{MIDDLE}_{D_TYPE}{LSEW}m{LLMUL}".format_map\
-        (args)
+      if type_list == "bfloat16":
+        if "ncvt" in args["OP"]:
+          func_name = "{OP}_f_f_w_bf{LSEW}m{LLMUL}".format_map(args)
+        elif "wcvt" in args["OP"]:
+          func_name = "{OP}_f_f_v_f{LSEW}m{LLMUL}".format_map(args)
+        else:
+          assert False, "Unhandled instruction for bfloat16 type"
+      else:
+        func_name = \
+          "{OP}_{TYPES1}_{TYPES3}_{MIDDLE}_{D_TYPE}{LSEW}m{LLMUL}".format_map\
+          (args)
       G.func(
           inst_info,
           name=func_name + decorator.func_suffix,
@@ -134,6 +149,10 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list):
       if decorator.flags & ExtraAttr.HAS_FRM:
         continue
 
+      # BFloat16 converts do not have `_rod`/`_rtz` instructions
+      if type_list == "bfloat16":
+        continue
+
       if args["TYPES1"] != args["TYPES3"] and args["TYPES3"] == "f":
         args["OP"] = args["OP"] + "_rtz"
         inst_info = InstInfo.get(
diff --git a/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/mac_template.py b/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/mac_template.py
index 0900eda42..da18f5c0a 100644
--- a/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/mac_template.py
+++ b/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/mac_template.py
@@ -41,7 +41,7 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list):
       if "int" in data_type and decorator.flags & ExtraAttr.HAS_FRM:
         continue
 
-      if data_type == "float":
+      if "float" in data_type:
         args["S_TYPE"] = "f"
         args["OP"] = "f" + op
         inst_type = InstType.VVF
@@ -129,14 +129,22 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list):
               rs1=type_helper.s,
               vs2=type_helper.v,
               vl=type_helper.size_t)
-      elif data_type == "float" and "w" in op:
+      elif "float" in data_type and "w" in op:
+        # Vector BF16 widening multiply-accumulate computes into FP32 values
+        if args["TYPE"] == "bfloat":
+          args["TYPE"] = "float"
+          dst_type_helper = TypeHelper(**args)
+          dst_type = dst_type_helper.wv
+        else:
+          dst_type = type_helper.wv
+
         G.func(
             inst_info_vv,
             name="{OP}_vv_{TYPE}{WSEW}m{WLMUL}".format_map(args) +
             decorator.func_suffix,
-            return_type=type_helper.wv,
+            return_type=dst_type,
             **decorator.mask_args(type_helper.m, type_helper.v),
-            vd=type_helper.wv,
+            vd=dst_type,
             vs1=type_helper.v,
             vs2=type_helper.v,
             **decorator.extra_csr_args(type_helper.uint),
@@ -145,9 +153,9 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list):
             inst_info_vs,
             name="{OP}_v{S_TYPE}_{TYPE}{WSEW}m{WLMUL}".format_map(args) +
             decorator.func_suffix,
-            return_type=type_helper.wv,
+            return_type=dst_type,
             **decorator.mask_args(type_helper.m, type_helper.v),
-            vd=type_helper.wv,
+            vd=dst_type,
             vs1=type_helper.s,
             vs2=type_helper.v,
             **decorator.extra_csr_args(type_helper.uint),
diff --git a/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/misc_op_template.py b/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/misc_op_template.py
index 9d38a0a9b..95b9a29ec 100644
--- a/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/misc_op_template.py
+++ b/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/misc_op_template.py
@@ -106,7 +106,10 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list):
           continue
       type_helper = TypeHelper(**args)
       inst_info = InstInfo.get(args, decorator, inst_type)
-      args["TYPE1"] = args["TYPE"][0]
+      if args["TYPE"] == "bfloat":
+        args["TYPE1"] = args["TYPE"][0:2]
+      else:
+        args["TYPE1"] = args["TYPE"][0]
       func_name = "{OP}_{TYPE1}{SEW}m{LMUL}_{TYPE1}{SEW}m{DST_LMUL}".format_map(
           args)
 
diff --git a/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/reint_op_template.py b/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/reint_op_template.py
index 1f67b5a7a..452cec078 100644
--- a/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/reint_op_template.py
+++ b/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/reint_op_template.py
@@ -30,8 +30,6 @@
 def render(G, op_list, type_list, sew_list, lmul_list, decorator_list):
   #pylint: disable=invalid-name, unused-argument
   # FIXME: Renaming 'G' to 'g' all in once later.
-  # FIXME: Argument 'type_list' is unused but required for interface
-  # consistency. We can prune it in the future.
   G.inst_group_prologue()
   for decorator in decorator_list:
     decorator.write_text_header(G)
@@ -39,9 +37,15 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list):
     G.write("// Reinterpret between different type under the same SEW/LMUL\n")
     # Variable in list means
     # [dst type, dst short type, src type, src short type]
-    convert_set = [["float", "f", "int", "i"], ["float", "f", "uint", "u"],
-                   ["uint", "u", "int", "i"], ["int", "i", "uint", "u"],
-                   ["int", "i", "float", "f"], ["uint", "u", "float", "f"]]
+    if type_list == "bfloat16":
+      convert_set = [["bfloat", "bf", "int",
+                      "i"], ["bfloat", "bf", "uint", "u"],
+                     ["int", "i", "bfloat", "bf"],
+                     ["uint", "u", "bfloat", "bf"]]
+    else:
+      convert_set = [["float", "f", "int", "i"], ["float", "f", "uint", "u"],
+                     ["uint", "u", "int", "i"], ["int", "i", "uint", "u"],
+                     ["int", "i", "float", "f"], ["uint", "u", "float", "f"]]
 
     for args in prod(
         OP=op_list, SEW=sew_list, TYPES=convert_set, LMUL=lmul_list):
@@ -73,6 +77,10 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list):
           **decorator.mask_args(type_helper.m, rt),
           src=src_type)
 
+    # Bfloat16 reinterpretations do not have variants below
+    if type_list == "bfloat16":
+      continue
+
     G.write("// Reinterpret between different SEW under the same LMUL\n")
     # Variable in list means
     # [dst type, dst short type, src type, src short type]
diff --git a/rvv-intrinsic-generator/rvv_intrinsic_gen/utils.py b/rvv-intrinsic-generator/rvv_intrinsic_gen/utils.py
index 190b0b426..6433eff12 100644
--- a/rvv-intrinsic-generator/rvv_intrinsic_gen/utils.py
+++ b/rvv-intrinsic-generator/rvv_intrinsic_gen/utils.py
@@ -139,6 +139,9 @@ def s(self):
         return "double"
       else:
         assert False, "Unhandled SEW under float type"
+    if self.args["TYPE"] == "bfloat":
+      assert self.args["SEW"] == 16, "BFloat16 only, no other SEW allowed"
+      return "__bf16"
     return "{TYPE}{SEW}_t".format_map(self.args)
 
   @property