diff --git a/doc/rvv-intrinsic-spec.adoc b/doc/rvv-intrinsic-spec.adoc index a87d2b049..dde113ac8 100644 --- a/doc/rvv-intrinsic-spec.adoc +++ b/doc/rvv-intrinsic-spec.adoc @@ -753,3 +753,53 @@ NOTE: Standard extensions are merged into `riscv/riscv-isa-manual` after ratific ^29^Section 3.6 (Vector Byte Length `vlenb`) in the specification ^0^ ^30^Section 16.6 (Whole Vector Register Move) in the specification ^0^ + + +== Vendor extension support +=== Introduction + +The RISC-V XTheadVector C intrinsics provide users interfaces in the C language level to directly leverage https://github.com/T-head-Semi/thead-extension-spec/[XTheadVector specification], with assistance from the compiler in handling instruction scheduling and register allocation. The intrinsics also aim to free users from responsibility of maintaining the correct configuration settings ^18^ for the xtheadvector instruction executions. + +The XTheadVector intrinsics support all RVV intrinsic functions with the restriction of missing support for fractional LMUL values. Additionally, the XTheadVector extension intrinsics API extends the RVV intrinsics API by new load and store functions. + +=== Test macro + +The `__riscv_th_v_intrinsic` macro is the C macro to test the compiler's support for the RISC-V "XTheadVector" extension intrinsics. The API extension is optional and targeting the vendor extension XTheadVector. + +The value of the test macro is defined as its version, which is computed using the following formula. The formula is identical to what is defined in the RISC-V C API specification ^1^ . + +---- + * 1,000,000 + * 1,000 + +---- + +For example, the v1.0 version should define the macro with value `1000000`. + +=== Availability + +With `` included, availability of intrinsic variants depends on the required architecture of their corresponding xtheadvector instructions. The supported architecture is specified to the compiler using the `-march` option ^2,3^. + +We suggest guarding the inclusion with the test macro. + +[,c] +---- +#if defined(__riscv_vector) && defined(__riscv_v_intrinsics) +#include +#elif defined (__riscv_xtheadvector) && defined (__riscv_th_v_intrinsics) +#include +#else +#error This file requires either RVV intrinsics or XTheadVector intrinsics +#endif +---- + +=== Example +[,c] +---- +void function (void * in, void *out) +{ + vint32m1_t v = __riscv_th_vlb_v_i32m1 (in, 4); + vint32m1_t v2 = __riscv_th_vlb_v_i32m1_tu (v, in, 4); + vint32m1_t v3 = __riscv_vadd_vv_i32m1 (v2, v2, 4); + vint32m1_t v4 = __riscv_vadd_vv_i32m1_tu (v3, v2, v2, 4); + __riscv_th_vsb_v_i32m1 (out, v4, 4); +} +---- diff --git a/rvv-intrinsic-generator/rvv_intrinsic_gen/constants.py b/rvv-intrinsic-generator/rvv_intrinsic_gen/constants.py index e2ae21964..3a9903977 100644 --- a/rvv-intrinsic-generator/rvv_intrinsic_gen/constants.py +++ b/rvv-intrinsic-generator/rvv_intrinsic_gen/constants.py @@ -19,15 +19,20 @@ """ LMULS = ["f8", "f4", "f2", 1, 2, 4, 8] +ILMULS = [1, 2, 4, 8] WLMULS = ["f8", "f4", "f2", 1, 2, 4] NCVTLMULS = ["f4", "f2", 1, 2, 4, 8] SEWS = [8, 16, 32, 64] WSEWS = [8, 16, 32] FSEWS = [16, 32, 64] +FFSEWS = [32, 64] WFSEWS = [16, 32] +WFFSEWS = [32] NSEWS = [16, 32, 64] TYPES = ["float", "int", "uint"] ITYPES = ["int", "uint"] +SITYPES = ["int"] +UITYPES = ["uint"] FTYPES = ["float"] MTYPES = ["bool"] MLENS = [1, 2, 4, 8, 16, 32, 64] diff --git a/rvv-intrinsic-generator/rvv_intrinsic_gen/inst.py b/rvv-intrinsic-generator/rvv_intrinsic_gen/inst.py index 759e6c6b4..1f6ccd506 100644 --- a/rvv-intrinsic-generator/rvv_intrinsic_gen/inst.py +++ b/rvv-intrinsic-generator/rvv_intrinsic_gen/inst.py @@ -42,8 +42,8 @@ from templates import mask_template from templates import mask_load_store_template from templates import permute_template -from constants import LMULS,WLMULS,NCVTLMULS,SEWS,WSEWS,FSEWS,WFSEWS,NSEWS,\ - TYPES,ITYPES,FTYPES,MTYPES,MLENS +from constants import LMULS,ILMULS,WLMULS,NCVTLMULS,SEWS,WSEWS,FSEWS,FFSEWS,WFSEWS,\ + WFFSEWS,NSEWS,TYPES,ITYPES,SITYPES,UITYPES,FTYPES,MTYPES,MLENS from generator import CompatibleHeaderGenerator @@ -511,5 +511,90 @@ def gen(g): "vector-creation", ["vcreate"], TYPES, SEWS, LMULS, decorators.has_no_masking) + #################################################################### + g.start_group("Additional Intrinsic Functions for XTheadVector") + + g.function_group(load_template, "XTheadVector Unit-Stride Load Intrinsics", + "xtheadvector-unit-stride-load", ["th_vlb"], + SITYPES, SEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(load_template, "XTheadVector Unit-Stride Load Intrinsics", + "xtheadvector-unit-stride-load", ["th_vlh"], + SITYPES, FSEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(load_template, "XTheadVector Unit-Stride Load Intrinsics", + "xtheadvector-unit-stride-load", ["th_vlw"], + SITYPES, FFSEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(load_template, "XTheadVector Unit-Stride Load Intrinsics", + "xtheadvector-unit-stride-load", ["th_vlbu"], + UITYPES, SEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(load_template, "XTheadVector Unit-Stride Load Intrinsics", + "xtheadvector-unit-stride-load", ["th_vlhu"], + UITYPES, FSEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(load_template, "XTheadVector Unit-Stride Load Intrinsics", + "xtheadvector-unit-stride-load", ["th_vlwu"], + UITYPES, FFSEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(store_template, "XTheadVector Unit-Stride Store Intrinsics", + "xtheadvector-unit-stride-store", ["th_vsb"], + ITYPES, WSEWS, ILMULS, decorators.has_masking_no_maskedoff) + g.function_group(store_template, "XTheadVector Unit-Stride Store Intrinsics", + "xtheadvector-unit-stride-store", ["th_vsh"], + ITYPES, WFSEWS, ILMULS, decorators.has_masking_no_maskedoff) + g.function_group(store_template, "XTheadVector Unit-Stride Store Intrinsics", + "xtheadvector-unit-stride-store", ["th_vsw"], + ITYPES, WFFSEWS, ILMULS, decorators.has_masking_no_maskedoff) + g.function_group(load_template, "XTheadVector Strided Load Intrinsics", + "xtheadvector-strided-load", ["th_vlsb"], + SITYPES, SEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(load_template, "XTheadVector Strided Load Intrinsics", + "xtheadvector-strided-load", ["th_vlsh"], + SITYPES, FSEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(load_template, "XTheadVector Strided Load Intrinsics", + "xtheadvector-strided-load", ["th_vlsw"], + SITYPES, FFSEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(load_template, "XTheadVector Strided Load Intrinsics", + "xtheadvector-strided-load", ["th_vlsbu"], + UITYPES, SEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(load_template, "XTheadVector Strided Load Intrinsics", + "xtheadvector-strided-load", ["th_vlshu"], + UITYPES, FSEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(load_template, "XTheadVector Strided Load Intrinsics", + "xtheadvector-strided-load", ["th_vlswu"], + UITYPES, FFSEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(store_template, "XTheadVector Strided Store Intrinsics", + "xtheadvector-strided-store", ["th_vssb"], + ITYPES, WSEWS, ILMULS, decorators.has_masking_no_maskedoff) + g.function_group(store_template, "XTheadVector Strided Store Intrinsics", + "xtheadvector-strided-store", ["th_vssh"], + ITYPES, WFSEWS, ILMULS, decorators.has_masking_no_maskedoff) + g.function_group(store_template, "XTheadVector Strided Store Intrinsics", + "xtheadvector-strided-store", ["th_vssw"], + ITYPES, WFFSEWS, ILMULS, decorators.has_masking_no_maskedoff) + g.function_group(load_template, "XTheadVector Indexed Load Intrinsics", + "xtheadvector-indexed-load", ["th_vlxb"], + SITYPES, SEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(load_template, "XTheadVector Indexed Load Intrinsics", + "xtheadvector-indexed-load", ["th_vlxh"], + SITYPES, FSEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(load_template, "XTheadVector Indexed Load Intrinsics", + "xtheadvector-indexed-load", ["th_vlxw"], + SITYPES, FFSEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(load_template, "XTheadVector Indexed Load Intrinsics", + "xtheadvector-indexed-load", ["th_vlxbu"], + UITYPES, SEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(load_template, "XTheadVector Indexed Load Intrinsics", + "xtheadvector-indexed-load", ["th_vlxhu"], + UITYPES, FSEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(load_template, "XTheadVector Indexed Load Intrinsics", + "xtheadvector-indexed-load", ["th_vlxwu"], + UITYPES, FFSEWS, ILMULS, decorators.has_masking_maskedoff_policy) + g.function_group(store_template, "XTheadVector Indexed Store Intrinsics", + "xtheadvector-indexed-store", ["th_vsxb", "th_vsuxb"], + ITYPES, WSEWS, ILMULS, decorators.has_masking_no_maskedoff) + g.function_group(store_template, "XTheadVector Indexed Store Intrinsics", + "xtheadvector-indexed-store", ["th_vsxh", "th_vsuxh"], + ITYPES, WFSEWS, ILMULS, decorators.has_masking_no_maskedoff) + g.function_group(store_template, "XTheadVector Indexed Store Intrinsics", + "xtheadvector-indexed-store", ["th_vsxw", "th_vsuxw"], + ITYPES, WFFSEWS, ILMULS, decorators.has_masking_no_maskedoff) + #################################################################### g.gen_prologue() diff --git a/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/load_template.py b/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/load_template.py index 4d2529a6d..efd4d2b05 100644 --- a/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/load_template.py +++ b/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/load_template.py @@ -49,6 +49,9 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list): if op in ["vlse"]: extra_addr_args["rs2"] = "ptrdiff_t" inst_type = InstType.VXX + if op in ["th_vlsb", "th_vlsh", "th_vlsw", "th_vlsbu", "th_vlshu", "th_vlswu"]: + extra_addr_args["rs2"] = "size_t" + inst_type = InstType.VXX if op in ["vloxei", "vluxei"]: elmul = type_helper.get_elmul(eew, sew) if elmul == 0: @@ -56,11 +59,20 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list): elmul_str = get_string_lmul(elmul, 1) extra_addr_args["rs2"] = f"vuint{eew}m{elmul_str}_t" inst_type = InstType.VV + if op in ["th_vlxb", "th_vlxh", "th_vlxw", "th_vlxbu", "th_vlxhu", "th_vlxwu"]: + elmul = type_helper.get_elmul(eew, sew) + if elmul == 0: + continue + elmul_str = get_string_lmul(elmul, 1) + extra_addr_args["indexed"] = f"vuint{eew}m{elmul_str}_t" + inst_type = InstType.VV if op == "vleff": extra_attr |= ExtraAttr.FIRST_FAULT args["OP"] = "vle" + str(eew) + "ff" extra_addr_args["new_vl"] = "size_t *" + elif (("b" in op) or ("h" in op) or ("w" in op)): + args["OP"] = op else: args["OP"] = op + str(eew) @@ -68,15 +80,27 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list): continue inst_info =\ InstInfo.get(args, decorator, inst_type, MemType.LOAD, extra_attr) - G.func( - inst_info, - name=\ - "{OP}_v_{TYPE}{SEW}m{LMUL}".format_map(args) + decorator.func_suffix, - return_type=type_helper.v, - **decorator.mask_args(type_helper.m, type_helper.v), - **decorator.tu_dest_args(type_helper.v), - rs1=f"const {type_helper.s} *", - **extra_addr_args, - vl=type_helper.size_t) + if (("b" in op) or ("h" in op) or ("w" in op)): + G.func( + inst_info, + name=\ + "{OP}_v_{TYPE}{SEW}m{LMUL}".format_map(args) + decorator.func_suffix, + return_type=type_helper.v, + **decorator.mask_args(type_helper.m, type_helper.v), + **decorator.tu_dest_args(type_helper.v), + a=f"const {type_helper.s} *", + **extra_addr_args, + vl=type_helper.size_t) + else: + G.func( + inst_info, + name=\ + "{OP}_v_{TYPE}{SEW}m{LMUL}".format_map(args) + decorator.func_suffix, + return_type=type_helper.v, + **decorator.mask_args(type_helper.m, type_helper.v), + **decorator.tu_dest_args(type_helper.v), + rs1=f"const {type_helper.s} *", + **extra_addr_args, + vl=type_helper.size_t) G.inst_group_epilogue() diff --git a/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/store_template.py b/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/store_template.py index 98476e6ba..ba0219fc5 100644 --- a/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/store_template.py +++ b/rvv-intrinsic-generator/rvv_intrinsic_gen/templates/store_template.py @@ -47,6 +47,9 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list): if op in ["vsse"]: extra_addr_args["rs2"] = "ptrdiff_t" inst_type = InstType.VXX + if op in ["th_vssb", "th_vssh", "th_vssw"]: + extra_addr_args["rs2"] = "size_t" + inst_type = InstType.VXX if op in ["vsoxei", "vsuxei"]: elmul = type_helper.get_elmul(eew, sew) if elmul == 0: @@ -54,21 +57,56 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list): elmul_str = get_string_lmul(elmul, 1) extra_addr_args["rs2"] = f"vuint{eew}m{elmul_str}_t" inst_type = InstType.VV - args["OP"] = op + str(eew) + if op in ["th_vsxb", "th_vsxh", "th_vsxw", "th_vsuxb", "th_vsuxh", "th_vsuxw"]: + elmul = type_helper.get_elmul(eew, sew) + if elmul == 0: + continue + elmul_str = get_string_lmul(elmul, 1) + extra_addr_args["indexed"] = f"vuint{eew}m{elmul_str}_t" + inst_type = InstType.VV + + if (("b" in op) or ("h" in op) or ("w" in op)): + args["OP"] = op + else: + args["OP"] = op + str(eew) if op not in ["vsoxei", "vsuxei"] and sew != eew: continue inst_info = InstInfo.get(args, decorator, inst_type, MemType.STORE) - G.func( - inst_info, - name="{OP}_v_{TYPE}{SEW}m{LMUL}".format_map(args) + - decorator.func_suffix, - return_type=type_helper.void, - **decorator.mask_args(type_helper.m, type_helper.v), - rs1=f"{type_helper.s} *", - **extra_addr_args, - vs3=type_helper.v, - vl=type_helper.size_t) + if (("b" in op) or ("h" in op) or ("w" in op)): + if ("x" in op): + G.func( + inst_info, + name="{OP}_v_{TYPE}{SEW}m{LMUL}".format_map(args) + + decorator.func_suffix, + return_type=type_helper.void, + **decorator.mask_args(type_helper.m, type_helper.v), + a=f"{type_helper.s} *", + **extra_addr_args, + b=type_helper.v, + vl=type_helper.size_t) + else: + G.func( + inst_info, + name="{OP}_v_{TYPE}{SEW}m{LMUL}".format_map(args) + + decorator.func_suffix, + return_type=type_helper.void, + **decorator.mask_args(type_helper.m, type_helper.v), + a=f"{type_helper.s} *", + **extra_addr_args, + vs3=type_helper.v, + vl=type_helper.size_t) + else: + G.func( + inst_info, + name="{OP}_v_{TYPE}{SEW}m{LMUL}".format_map(args) + + decorator.func_suffix, + return_type=type_helper.void, + **decorator.mask_args(type_helper.m, type_helper.v), + rs1=f"{type_helper.s} *", + **extra_addr_args, + vs3=type_helper.v, + vl=type_helper.size_t) G.inst_group_epilogue()