Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add intrinsics for xtheadvector extension #298

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions doc/rvv-intrinsic-spec.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -753,3 +753,53 @@ NOTE: Standard extensions are merged into `riscv/riscv-isa-manual` after ratific
^29^Section 3.6 (Vector Byte Length `vlenb`) in the specification ^0^

^30^Section 16.6 (Whole Vector Register Move) in the specification ^0^


== Vendor extension support
=== Introduction

The RISC-V XTheadVector C intrinsics provide users interfaces in the C language level to directly leverage https://github.com/T-head-Semi/thead-extension-spec/[XTheadVector specification], with assistance from the compiler in handling instruction scheduling and register allocation. The intrinsics also aim to free users from responsibility of maintaining the correct configuration settings ^18^ for the xtheadvector instruction executions.

The XTheadVector intrinsics support all RVV intrinsic functions with the restriction of missing support for fractional LMUL values. Additionally, the XTheadVector extension intrinsics API extends the RVV intrinsics API by new load and store functions.

=== Test macro

The `__riscv_th_v_intrinsic` macro is the C macro to test the compiler's support for the RISC-V "XTheadVector" extension intrinsics. The API extension is optional and targeting the vendor extension XTheadVector.

The value of the test macro is defined as its version, which is computed using the following formula. The formula is identical to what is defined in the RISC-V C API specification ^1^ .

----
<MAJOR_VERSION> * 1,000,000 + <MINOR_VERSION> * 1,000 + <REVISION_VERSION>
----

For example, the v1.0 version should define the macro with value `1000000`.

=== Availability

With `<riscv_th_vector.h>` included, availability of intrinsic variants depends on the required architecture of their corresponding xtheadvector instructions. The supported architecture is specified to the compiler using the `-march` option ^2,3^.

We suggest guarding the inclusion with the test macro.

[,c]
----
#if defined(__riscv_vector) && defined(__riscv_v_intrinsics)
#include <riscv_vector.h>
#elif defined (__riscv_xtheadvector) && defined (__riscv_th_v_intrinsics)
#include <riscv_th_vector.h>
Comment on lines +785 to +788
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possible typo here: __riscv_th_v_intrinsics should be __riscv_th_v_intrinsic.
Same for __riscv_v_intrinsics, which should be __riscv_v_intrinsic

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct, thanks for reporting!

#else
#error This file requires either RVV intrinsics or XTheadVector intrinsics
#endif
----

=== Example
[,c]
----
void function (void * in, void *out)
{
vint32m1_t v = __riscv_th_vlb_v_i32m1 (in, 4);
vint32m1_t v2 = __riscv_th_vlb_v_i32m1_tu (v, in, 4);
vint32m1_t v3 = __riscv_vadd_vv_i32m1 (v2, v2, 4);
vint32m1_t v4 = __riscv_vadd_vv_i32m1_tu (v3, v2, v2, 4);
__riscv_th_vsb_v_i32m1 (out, v4, 4);
}
----
5 changes: 5 additions & 0 deletions rvv-intrinsic-generator/rvv_intrinsic_gen/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,20 @@
"""

LMULS = ["f8", "f4", "f2", 1, 2, 4, 8]
ILMULS = [1, 2, 4, 8]
WLMULS = ["f8", "f4", "f2", 1, 2, 4]
NCVTLMULS = ["f4", "f2", 1, 2, 4, 8]
SEWS = [8, 16, 32, 64]
WSEWS = [8, 16, 32]
FSEWS = [16, 32, 64]
FFSEWS = [32, 64]
WFSEWS = [16, 32]
WFFSEWS = [32]
NSEWS = [16, 32, 64]
TYPES = ["float", "int", "uint"]
ITYPES = ["int", "uint"]
SITYPES = ["int"]
UITYPES = ["uint"]
FTYPES = ["float"]
MTYPES = ["bool"]
MLENS = [1, 2, 4, 8, 16, 32, 64]
89 changes: 87 additions & 2 deletions rvv-intrinsic-generator/rvv_intrinsic_gen/inst.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@
from templates import mask_template
from templates import mask_load_store_template
from templates import permute_template
from constants import LMULS,WLMULS,NCVTLMULS,SEWS,WSEWS,FSEWS,WFSEWS,NSEWS,\
TYPES,ITYPES,FTYPES,MTYPES,MLENS
from constants import LMULS,ILMULS,WLMULS,NCVTLMULS,SEWS,WSEWS,FSEWS,FFSEWS,WFSEWS,\
WFFSEWS,NSEWS,TYPES,ITYPES,SITYPES,UITYPES,FTYPES,MTYPES,MLENS
from generator import CompatibleHeaderGenerator


Expand Down Expand Up @@ -511,5 +511,90 @@ def gen(g):
"vector-creation", ["vcreate"], TYPES, SEWS, LMULS,
decorators.has_no_masking)

####################################################################
g.start_group("Additional Intrinsic Functions for XTheadVector")

g.function_group(load_template, "XTheadVector Unit-Stride Load Intrinsics",
"xtheadvector-unit-stride-load", ["th_vlb"],
SITYPES, SEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(load_template, "XTheadVector Unit-Stride Load Intrinsics",
"xtheadvector-unit-stride-load", ["th_vlh"],
SITYPES, FSEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(load_template, "XTheadVector Unit-Stride Load Intrinsics",
"xtheadvector-unit-stride-load", ["th_vlw"],
SITYPES, FFSEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(load_template, "XTheadVector Unit-Stride Load Intrinsics",
"xtheadvector-unit-stride-load", ["th_vlbu"],
UITYPES, SEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(load_template, "XTheadVector Unit-Stride Load Intrinsics",
"xtheadvector-unit-stride-load", ["th_vlhu"],
UITYPES, FSEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(load_template, "XTheadVector Unit-Stride Load Intrinsics",
"xtheadvector-unit-stride-load", ["th_vlwu"],
UITYPES, FFSEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(store_template, "XTheadVector Unit-Stride Store Intrinsics",
"xtheadvector-unit-stride-store", ["th_vsb"],
ITYPES, WSEWS, ILMULS, decorators.has_masking_no_maskedoff)
g.function_group(store_template, "XTheadVector Unit-Stride Store Intrinsics",
"xtheadvector-unit-stride-store", ["th_vsh"],
ITYPES, WFSEWS, ILMULS, decorators.has_masking_no_maskedoff)
g.function_group(store_template, "XTheadVector Unit-Stride Store Intrinsics",
"xtheadvector-unit-stride-store", ["th_vsw"],
ITYPES, WFFSEWS, ILMULS, decorators.has_masking_no_maskedoff)
g.function_group(load_template, "XTheadVector Strided Load Intrinsics",
"xtheadvector-strided-load", ["th_vlsb"],
SITYPES, SEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(load_template, "XTheadVector Strided Load Intrinsics",
"xtheadvector-strided-load", ["th_vlsh"],
SITYPES, FSEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(load_template, "XTheadVector Strided Load Intrinsics",
"xtheadvector-strided-load", ["th_vlsw"],
SITYPES, FFSEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(load_template, "XTheadVector Strided Load Intrinsics",
"xtheadvector-strided-load", ["th_vlsbu"],
UITYPES, SEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(load_template, "XTheadVector Strided Load Intrinsics",
"xtheadvector-strided-load", ["th_vlshu"],
UITYPES, FSEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(load_template, "XTheadVector Strided Load Intrinsics",
"xtheadvector-strided-load", ["th_vlswu"],
UITYPES, FFSEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(store_template, "XTheadVector Strided Store Intrinsics",
"xtheadvector-strided-store", ["th_vssb"],
ITYPES, WSEWS, ILMULS, decorators.has_masking_no_maskedoff)
g.function_group(store_template, "XTheadVector Strided Store Intrinsics",
"xtheadvector-strided-store", ["th_vssh"],
ITYPES, WFSEWS, ILMULS, decorators.has_masking_no_maskedoff)
g.function_group(store_template, "XTheadVector Strided Store Intrinsics",
"xtheadvector-strided-store", ["th_vssw"],
ITYPES, WFFSEWS, ILMULS, decorators.has_masking_no_maskedoff)
g.function_group(load_template, "XTheadVector Indexed Load Intrinsics",
"xtheadvector-indexed-load", ["th_vlxb"],
SITYPES, SEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(load_template, "XTheadVector Indexed Load Intrinsics",
"xtheadvector-indexed-load", ["th_vlxh"],
SITYPES, FSEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(load_template, "XTheadVector Indexed Load Intrinsics",
"xtheadvector-indexed-load", ["th_vlxw"],
SITYPES, FFSEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(load_template, "XTheadVector Indexed Load Intrinsics",
"xtheadvector-indexed-load", ["th_vlxbu"],
UITYPES, SEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(load_template, "XTheadVector Indexed Load Intrinsics",
"xtheadvector-indexed-load", ["th_vlxhu"],
UITYPES, FSEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(load_template, "XTheadVector Indexed Load Intrinsics",
"xtheadvector-indexed-load", ["th_vlxwu"],
UITYPES, FFSEWS, ILMULS, decorators.has_masking_maskedoff_policy)
g.function_group(store_template, "XTheadVector Indexed Store Intrinsics",
"xtheadvector-indexed-store", ["th_vsxb", "th_vsuxb"],
ITYPES, WSEWS, ILMULS, decorators.has_masking_no_maskedoff)
g.function_group(store_template, "XTheadVector Indexed Store Intrinsics",
"xtheadvector-indexed-store", ["th_vsxh", "th_vsuxh"],
ITYPES, WFSEWS, ILMULS, decorators.has_masking_no_maskedoff)
g.function_group(store_template, "XTheadVector Indexed Store Intrinsics",
"xtheadvector-indexed-store", ["th_vsxw", "th_vsuxw"],
ITYPES, WFFSEWS, ILMULS, decorators.has_masking_no_maskedoff)

####################################################################
g.gen_prologue()
Original file line number Diff line number Diff line change
Expand Up @@ -49,34 +49,58 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list):
if op in ["vlse"]:
extra_addr_args["rs2"] = "ptrdiff_t"
inst_type = InstType.VXX
if op in ["th_vlsb", "th_vlsh", "th_vlsw", "th_vlsbu", "th_vlshu", "th_vlswu"]:
extra_addr_args["rs2"] = "size_t"
inst_type = InstType.VXX
if op in ["vloxei", "vluxei"]:
elmul = type_helper.get_elmul(eew, sew)
if elmul == 0:
continue
elmul_str = get_string_lmul(elmul, 1)
extra_addr_args["rs2"] = f"vuint{eew}m{elmul_str}_t"
inst_type = InstType.VV
if op in ["th_vlxb", "th_vlxh", "th_vlxw", "th_vlxbu", "th_vlxhu", "th_vlxwu"]:
elmul = type_helper.get_elmul(eew, sew)
if elmul == 0:
continue
elmul_str = get_string_lmul(elmul, 1)
extra_addr_args["indexed"] = f"vuint{eew}m{elmul_str}_t"
inst_type = InstType.VV

if op == "vleff":
extra_attr |= ExtraAttr.FIRST_FAULT
args["OP"] = "vle" + str(eew) + "ff"
extra_addr_args["new_vl"] = "size_t *"
elif (("b" in op) or ("h" in op) or ("w" in op)):
args["OP"] = op
else:
args["OP"] = op + str(eew)

if op not in ["vloxei", "vluxei"] and sew != eew:
continue
inst_info =\
InstInfo.get(args, decorator, inst_type, MemType.LOAD, extra_attr)
G.func(
inst_info,
name=\
"{OP}_v_{TYPE}{SEW}m{LMUL}".format_map(args) + decorator.func_suffix,
return_type=type_helper.v,
**decorator.mask_args(type_helper.m, type_helper.v),
**decorator.tu_dest_args(type_helper.v),
rs1=f"const {type_helper.s} *",
**extra_addr_args,
vl=type_helper.size_t)
if (("b" in op) or ("h" in op) or ("w" in op)):
G.func(
inst_info,
name=\
"{OP}_v_{TYPE}{SEW}m{LMUL}".format_map(args) + decorator.func_suffix,
return_type=type_helper.v,
**decorator.mask_args(type_helper.m, type_helper.v),
**decorator.tu_dest_args(type_helper.v),
a=f"const {type_helper.s} *",
**extra_addr_args,
vl=type_helper.size_t)
else:
G.func(
inst_info,
name=\
"{OP}_v_{TYPE}{SEW}m{LMUL}".format_map(args) + decorator.func_suffix,
return_type=type_helper.v,
**decorator.mask_args(type_helper.m, type_helper.v),
**decorator.tu_dest_args(type_helper.v),
rs1=f"const {type_helper.s} *",
**extra_addr_args,
vl=type_helper.size_t)

G.inst_group_epilogue()
Original file line number Diff line number Diff line change
Expand Up @@ -47,28 +47,66 @@ def render(G, op_list, type_list, sew_list, lmul_list, decorator_list):
if op in ["vsse"]:
extra_addr_args["rs2"] = "ptrdiff_t"
inst_type = InstType.VXX
if op in ["th_vssb", "th_vssh", "th_vssw"]:
extra_addr_args["rs2"] = "size_t"
inst_type = InstType.VXX
if op in ["vsoxei", "vsuxei"]:
elmul = type_helper.get_elmul(eew, sew)
if elmul == 0:
continue
elmul_str = get_string_lmul(elmul, 1)
extra_addr_args["rs2"] = f"vuint{eew}m{elmul_str}_t"
inst_type = InstType.VV
args["OP"] = op + str(eew)
if op in ["th_vsxb", "th_vsxh", "th_vsxw", "th_vsuxb", "th_vsuxh", "th_vsuxw"]:
elmul = type_helper.get_elmul(eew, sew)
if elmul == 0:
continue
elmul_str = get_string_lmul(elmul, 1)
extra_addr_args["indexed"] = f"vuint{eew}m{elmul_str}_t"
inst_type = InstType.VV

if (("b" in op) or ("h" in op) or ("w" in op)):
args["OP"] = op
else:
args["OP"] = op + str(eew)

if op not in ["vsoxei", "vsuxei"] and sew != eew:
continue

inst_info = InstInfo.get(args, decorator, inst_type, MemType.STORE)
G.func(
inst_info,
name="{OP}_v_{TYPE}{SEW}m{LMUL}".format_map(args) +
decorator.func_suffix,
return_type=type_helper.void,
**decorator.mask_args(type_helper.m, type_helper.v),
rs1=f"{type_helper.s} *",
**extra_addr_args,
vs3=type_helper.v,
vl=type_helper.size_t)
if (("b" in op) or ("h" in op) or ("w" in op)):
if ("x" in op):
G.func(
inst_info,
name="{OP}_v_{TYPE}{SEW}m{LMUL}".format_map(args) +
decorator.func_suffix,
return_type=type_helper.void,
**decorator.mask_args(type_helper.m, type_helper.v),
a=f"{type_helper.s} *",
**extra_addr_args,
b=type_helper.v,
vl=type_helper.size_t)
else:
G.func(
inst_info,
name="{OP}_v_{TYPE}{SEW}m{LMUL}".format_map(args) +
decorator.func_suffix,
return_type=type_helper.void,
**decorator.mask_args(type_helper.m, type_helper.v),
a=f"{type_helper.s} *",
**extra_addr_args,
vs3=type_helper.v,
vl=type_helper.size_t)
else:
G.func(
inst_info,
name="{OP}_v_{TYPE}{SEW}m{LMUL}".format_map(args) +
decorator.func_suffix,
return_type=type_helper.void,
**decorator.mask_args(type_helper.m, type_helper.v),
rs1=f"{type_helper.s} *",
**extra_addr_args,
vs3=type_helper.v,
vl=type_helper.size_t)

G.inst_group_epilogue()