Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Define BFloat16 intrinsics #293

Merged
merged 18 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
c4c690e
Add specifications for the bfloat16 intrinsics
eopXD Nov 1, 2023
c54ed9c
Define intrinsics that enables the use of BFloat16 types
eopXD Nov 4, 2023
c4cd5d0
[Makefile] Add BFloat16 targets
eopXD Nov 4, 2023
a05a648
[Auto-gen] Update bfloat16 documents under ../auto-generated. (make g…
eopXD Nov 4, 2023
bb01132
[Auto-gen] Update bfloat16 tests under ../auto-generated. (make git-c…
eopXD Nov 4, 2023
0b8d701
Define BFloat16 convert intrinsics
eopXD Nov 4, 2023
0a79ff1
[Auto-gen] Update bfloat16 documents under ../auto-generated. (make g…
eopXD Nov 4, 2023
47a7d79
[Auto-gen] Update bfloat16 tests under ../auto-generated. (make git-c…
eopXD Nov 4, 2023
89a2df2
Define BFloat16 widening-accumulate intrinsics
eopXD Nov 4, 2023
342894b
[Auto-gen] Update bfloat16 documents under ../auto-generated. (make g…
eopXD Nov 4, 2023
85ea993
[Auto-gen] Update bfloat16 tests under ../auto-generated. (make git-c…
eopXD Nov 4, 2023
d295023
Add note that specification uses __bf16 to represent scalar BFloat16 …
eopXD Nov 4, 2023
63dbf0c
Add tuple types table for BFloat16 types
eopXD Nov 4, 2023
34b8726
Fix type abbreviation in reinterpret intrinsics for bfloat16
eopXD Nov 11, 2023
28a6e83
[Auto-gen] Update bfloat16 documents under ../auto-generated. (make g…
eopXD Nov 11, 2023
0a478ed
[Auto-gen] Update bfloat16 tests under ../auto-generated. (make git-c…
eopXD Nov 11, 2023
22bccf1
Support bfloat16 in floating point test case header
4vtomat Apr 23, 2024
849dafc
Handle bfloat16 in misc_op_template.py
4vtomat Apr 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
177 changes: 177 additions & 0 deletions auto-generated/bfloat16/api-testing/vcreate.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#include <riscv_vector.h>
#include <stdint.h>

vbfloat16m2_t test_vcreate_v_bf16m1_bf16m2(vbfloat16m1_t v0, vbfloat16m1_t v1) {
return __riscv_vcreate_v_bf16m1_bf16m2(v0, v1);
}

vbfloat16m4_t test_vcreate_v_bf16m1_bf16m4(vbfloat16m1_t v0, vbfloat16m1_t v1,
vbfloat16m1_t v2, vbfloat16m1_t v3) {
return __riscv_vcreate_v_bf16m1_bf16m4(v0, v1, v2, v3);
}

vbfloat16m8_t test_vcreate_v_bf16m1_bf16m8(vbfloat16m1_t v0, vbfloat16m1_t v1,
vbfloat16m1_t v2, vbfloat16m1_t v3,
vbfloat16m1_t v4, vbfloat16m1_t v5,
vbfloat16m1_t v6, vbfloat16m1_t v7) {
return __riscv_vcreate_v_bf16m1_bf16m8(v0, v1, v2, v3, v4, v5, v6, v7);
}

vbfloat16m4_t test_vcreate_v_bf16m2_bf16m4(vbfloat16m2_t v0, vbfloat16m2_t v1) {
return __riscv_vcreate_v_bf16m2_bf16m4(v0, v1);
}

vbfloat16m8_t test_vcreate_v_bf16m2_bf16m8(vbfloat16m2_t v0, vbfloat16m2_t v1,
vbfloat16m2_t v2, vbfloat16m2_t v3) {
return __riscv_vcreate_v_bf16m2_bf16m8(v0, v1, v2, v3);
}

vbfloat16m8_t test_vcreate_v_bf16m4_bf16m8(vbfloat16m4_t v0, vbfloat16m4_t v1) {
return __riscv_vcreate_v_bf16m4_bf16m8(v0, v1);
}

vbfloat16mf4x2_t test_vcreate_v_bf16mf4x2(vbfloat16mf4_t v0,
vbfloat16mf4_t v1) {
return __riscv_vcreate_v_bf16mf4x2(v0, v1);
}

vbfloat16mf4x3_t test_vcreate_v_bf16mf4x3(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
vbfloat16mf4_t v2) {
return __riscv_vcreate_v_bf16mf4x3(v0, v1, v2);
}

vbfloat16mf4x4_t test_vcreate_v_bf16mf4x4(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
vbfloat16mf4_t v2,
vbfloat16mf4_t v3) {
return __riscv_vcreate_v_bf16mf4x4(v0, v1, v2, v3);
}

vbfloat16mf4x5_t test_vcreate_v_bf16mf4x5(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
vbfloat16mf4_t v2, vbfloat16mf4_t v3,
vbfloat16mf4_t v4) {
return __riscv_vcreate_v_bf16mf4x5(v0, v1, v2, v3, v4);
}

vbfloat16mf4x6_t test_vcreate_v_bf16mf4x6(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
vbfloat16mf4_t v2, vbfloat16mf4_t v3,
vbfloat16mf4_t v4,
vbfloat16mf4_t v5) {
return __riscv_vcreate_v_bf16mf4x6(v0, v1, v2, v3, v4, v5);
}

vbfloat16mf4x7_t test_vcreate_v_bf16mf4x7(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
vbfloat16mf4_t v2, vbfloat16mf4_t v3,
vbfloat16mf4_t v4, vbfloat16mf4_t v5,
vbfloat16mf4_t v6) {
return __riscv_vcreate_v_bf16mf4x7(v0, v1, v2, v3, v4, v5, v6);
}

vbfloat16mf4x8_t test_vcreate_v_bf16mf4x8(vbfloat16mf4_t v0, vbfloat16mf4_t v1,
vbfloat16mf4_t v2, vbfloat16mf4_t v3,
vbfloat16mf4_t v4, vbfloat16mf4_t v5,
vbfloat16mf4_t v6,
vbfloat16mf4_t v7) {
return __riscv_vcreate_v_bf16mf4x8(v0, v1, v2, v3, v4, v5, v6, v7);
}

vbfloat16mf2x2_t test_vcreate_v_bf16mf2x2(vbfloat16mf2_t v0,
vbfloat16mf2_t v1) {
return __riscv_vcreate_v_bf16mf2x2(v0, v1);
}

vbfloat16mf2x3_t test_vcreate_v_bf16mf2x3(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
vbfloat16mf2_t v2) {
return __riscv_vcreate_v_bf16mf2x3(v0, v1, v2);
}

vbfloat16mf2x4_t test_vcreate_v_bf16mf2x4(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
vbfloat16mf2_t v2,
vbfloat16mf2_t v3) {
return __riscv_vcreate_v_bf16mf2x4(v0, v1, v2, v3);
}

vbfloat16mf2x5_t test_vcreate_v_bf16mf2x5(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
vbfloat16mf2_t v2, vbfloat16mf2_t v3,
vbfloat16mf2_t v4) {
return __riscv_vcreate_v_bf16mf2x5(v0, v1, v2, v3, v4);
}

vbfloat16mf2x6_t test_vcreate_v_bf16mf2x6(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
vbfloat16mf2_t v2, vbfloat16mf2_t v3,
vbfloat16mf2_t v4,
vbfloat16mf2_t v5) {
return __riscv_vcreate_v_bf16mf2x6(v0, v1, v2, v3, v4, v5);
}

vbfloat16mf2x7_t test_vcreate_v_bf16mf2x7(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
vbfloat16mf2_t v2, vbfloat16mf2_t v3,
vbfloat16mf2_t v4, vbfloat16mf2_t v5,
vbfloat16mf2_t v6) {
return __riscv_vcreate_v_bf16mf2x7(v0, v1, v2, v3, v4, v5, v6);
}

vbfloat16mf2x8_t test_vcreate_v_bf16mf2x8(vbfloat16mf2_t v0, vbfloat16mf2_t v1,
vbfloat16mf2_t v2, vbfloat16mf2_t v3,
vbfloat16mf2_t v4, vbfloat16mf2_t v5,
vbfloat16mf2_t v6,
vbfloat16mf2_t v7) {
return __riscv_vcreate_v_bf16mf2x8(v0, v1, v2, v3, v4, v5, v6, v7);
}

vbfloat16m1x2_t test_vcreate_v_bf16m1x2(vbfloat16m1_t v0, vbfloat16m1_t v1) {
return __riscv_vcreate_v_bf16m1x2(v0, v1);
}

vbfloat16m1x3_t test_vcreate_v_bf16m1x3(vbfloat16m1_t v0, vbfloat16m1_t v1,
vbfloat16m1_t v2) {
return __riscv_vcreate_v_bf16m1x3(v0, v1, v2);
}

vbfloat16m1x4_t test_vcreate_v_bf16m1x4(vbfloat16m1_t v0, vbfloat16m1_t v1,
vbfloat16m1_t v2, vbfloat16m1_t v3) {
return __riscv_vcreate_v_bf16m1x4(v0, v1, v2, v3);
}

vbfloat16m1x5_t test_vcreate_v_bf16m1x5(vbfloat16m1_t v0, vbfloat16m1_t v1,
vbfloat16m1_t v2, vbfloat16m1_t v3,
vbfloat16m1_t v4) {
return __riscv_vcreate_v_bf16m1x5(v0, v1, v2, v3, v4);
}

vbfloat16m1x6_t test_vcreate_v_bf16m1x6(vbfloat16m1_t v0, vbfloat16m1_t v1,
vbfloat16m1_t v2, vbfloat16m1_t v3,
vbfloat16m1_t v4, vbfloat16m1_t v5) {
return __riscv_vcreate_v_bf16m1x6(v0, v1, v2, v3, v4, v5);
}

vbfloat16m1x7_t test_vcreate_v_bf16m1x7(vbfloat16m1_t v0, vbfloat16m1_t v1,
vbfloat16m1_t v2, vbfloat16m1_t v3,
vbfloat16m1_t v4, vbfloat16m1_t v5,
vbfloat16m1_t v6) {
return __riscv_vcreate_v_bf16m1x7(v0, v1, v2, v3, v4, v5, v6);
}

vbfloat16m1x8_t test_vcreate_v_bf16m1x8(vbfloat16m1_t v0, vbfloat16m1_t v1,
vbfloat16m1_t v2, vbfloat16m1_t v3,
vbfloat16m1_t v4, vbfloat16m1_t v5,
vbfloat16m1_t v6, vbfloat16m1_t v7) {
return __riscv_vcreate_v_bf16m1x8(v0, v1, v2, v3, v4, v5, v6, v7);
}

vbfloat16m2x2_t test_vcreate_v_bf16m2x2(vbfloat16m2_t v0, vbfloat16m2_t v1) {
return __riscv_vcreate_v_bf16m2x2(v0, v1);
}

vbfloat16m2x3_t test_vcreate_v_bf16m2x3(vbfloat16m2_t v0, vbfloat16m2_t v1,
vbfloat16m2_t v2) {
return __riscv_vcreate_v_bf16m2x3(v0, v1, v2);
}

vbfloat16m2x4_t test_vcreate_v_bf16m2x4(vbfloat16m2_t v0, vbfloat16m2_t v1,
vbfloat16m2_t v2, vbfloat16m2_t v3) {
return __riscv_vcreate_v_bf16m2x4(v0, v1, v2, v3);
}

vbfloat16m4x2_t test_vcreate_v_bf16m4x2(vbfloat16m4_t v0, vbfloat16m4_t v1) {
return __riscv_vcreate_v_bf16m4x2(v0, v1);
}
92 changes: 92 additions & 0 deletions auto-generated/bfloat16/api-testing/vfncvtbf16.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#include <riscv_vector.h>
#include <stdint.h>

vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4(vfloat32mf2_t vs2, size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16mf4(vs2, vl);
}

vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2(vfloat32m1_t vs2, size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16mf2(vs2, vl);
}

vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1(vfloat32m2_t vs2, size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16m1(vs2, vl);
}

vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2(vfloat32m4_t vs2, size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16m2(vs2, vl);
}

vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4(vfloat32m8_t vs2, size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16m4(vs2, vl);
}

vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_m(vbool64_t vm, vfloat32mf2_t vs2,
size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16mf4_m(vm, vs2, vl);
}

vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_m(vbool32_t vm, vfloat32m1_t vs2,
size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16mf2_m(vm, vs2, vl);
}

vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_m(vbool16_t vm, vfloat32m2_t vs2,
size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16m1_m(vm, vs2, vl);
}

vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_m(vbool8_t vm, vfloat32m4_t vs2,
size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16m2_m(vm, vs2, vl);
}

vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_m(vbool4_t vm, vfloat32m8_t vs2,
size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16m4_m(vm, vs2, vl);
}

vbfloat16mf4_t test_vfncvtbf16_f_f_w_bf16mf4_rm(vfloat32mf2_t vs2, size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16mf4_rm(vs2, __RISCV_FRM_RNE, vl);
}

vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm(vfloat32m1_t vs2, size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16mf2_rm(vs2, __RISCV_FRM_RNE, vl);
}

vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm(vfloat32m2_t vs2, size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16m1_rm(vs2, __RISCV_FRM_RNE, vl);
}

vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm(vfloat32m4_t vs2, size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16m2_rm(vs2, __RISCV_FRM_RNE, vl);
}

vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm(vfloat32m8_t vs2, size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16m4_rm(vs2, __RISCV_FRM_RNE, vl);
}

vbfloat16mf4_t
test_vfncvtbf16_f_f_w_bf16mf4_rm_m(vbool64_t vm, vfloat32mf2_t vs2, size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16mf4_rm_m(vm, vs2, __RISCV_FRM_RNE, vl);
}

vbfloat16mf2_t test_vfncvtbf16_f_f_w_bf16mf2_rm_m(vbool32_t vm,
vfloat32m1_t vs2, size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16mf2_rm_m(vm, vs2, __RISCV_FRM_RNE, vl);
}

vbfloat16m1_t test_vfncvtbf16_f_f_w_bf16m1_rm_m(vbool16_t vm, vfloat32m2_t vs2,
size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16m1_rm_m(vm, vs2, __RISCV_FRM_RNE, vl);
}

vbfloat16m2_t test_vfncvtbf16_f_f_w_bf16m2_rm_m(vbool8_t vm, vfloat32m4_t vs2,
size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16m2_rm_m(vm, vs2, __RISCV_FRM_RNE, vl);
}

vbfloat16m4_t test_vfncvtbf16_f_f_w_bf16m4_rm_m(vbool4_t vm, vfloat32m8_t vs2,
size_t vl) {
return __riscv_vfncvtbf16_f_f_w_bf16m4_rm_m(vm, vs2, __RISCV_FRM_RNE, vl);
}
47 changes: 47 additions & 0 deletions auto-generated/bfloat16/api-testing/vfwcvtbf16.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#include <riscv_vector.h>
#include <stdint.h>

vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2(vbfloat16mf4_t vs2, size_t vl) {
return __riscv_vfwcvtbf16_f_f_v_f32mf2(vs2, vl);
}

vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1(vbfloat16mf2_t vs2, size_t vl) {
return __riscv_vfwcvtbf16_f_f_v_f32m1(vs2, vl);
}

vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2(vbfloat16m1_t vs2, size_t vl) {
return __riscv_vfwcvtbf16_f_f_v_f32m2(vs2, vl);
}

vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4(vbfloat16m2_t vs2, size_t vl) {
return __riscv_vfwcvtbf16_f_f_v_f32m4(vs2, vl);
}

vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8(vbfloat16m4_t vs2, size_t vl) {
return __riscv_vfwcvtbf16_f_f_v_f32m8(vs2, vl);
}

vfloat32mf2_t test_vfwcvtbf16_f_f_v_f32mf2_m(vbool64_t vm, vbfloat16mf4_t vs2,
size_t vl) {
return __riscv_vfwcvtbf16_f_f_v_f32mf2_m(vm, vs2, vl);
}

vfloat32m1_t test_vfwcvtbf16_f_f_v_f32m1_m(vbool32_t vm, vbfloat16mf2_t vs2,
size_t vl) {
return __riscv_vfwcvtbf16_f_f_v_f32m1_m(vm, vs2, vl);
}

vfloat32m2_t test_vfwcvtbf16_f_f_v_f32m2_m(vbool16_t vm, vbfloat16m1_t vs2,
size_t vl) {
return __riscv_vfwcvtbf16_f_f_v_f32m2_m(vm, vs2, vl);
}

vfloat32m4_t test_vfwcvtbf16_f_f_v_f32m4_m(vbool8_t vm, vbfloat16m2_t vs2,
size_t vl) {
return __riscv_vfwcvtbf16_f_f_v_f32m4_m(vm, vs2, vl);
}

vfloat32m8_t test_vfwcvtbf16_f_f_v_f32m8_m(vbool4_t vm, vbfloat16m4_t vs2,
size_t vl) {
return __riscv_vfwcvtbf16_f_f_v_f32m8_m(vm, vs2, vl);
}
Loading
Loading