Skip to content

Commit c4725d5

Browse files
committed
Implement vector configuration intructions
Add support for vset{i}vl{i} instructions following the RISC-V vector extension version 1.0. Simplify avlmax calculation by directly computing avlmax = lmul * vlen / sew instead of converting to floating-point as described in the specification.
1 parent fbcfad1 commit c4725d5

File tree

1 file changed

+143
-3
lines changed

1 file changed

+143
-3
lines changed

src/rv32_template.c

Lines changed: 143 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3038,26 +3038,166 @@ RVOP(
30383038
(rv)->V[rv_reg_zero][i] = 0; \
30393039
}
30403040

3041+
#define VREG_U32_COUNT ((VLEN) >> (5))
3042+
/*
3043+
* Vector Configuration-Setting Instructions
3044+
*
3045+
* These instructions set the vector CSRs, specifically csr_vl and csr_vtype.
3046+
* The CSRs can only be updated using vset{i}vl{i} instructions. The current
3047+
* implementation does not support vma and vta.
3048+
*
3049+
* The value VLMAX = (LMUL * VLEN) / SEW represents the maximum number of
3050+
* elements that can be processed by a single vector instruction given the
3051+
* current SEW and LMUL.
3052+
*
3053+
* Constraints on Setting vl:
3054+
* - vl = AVL if AVL ≤ VLMAX
3055+
* - ceil(AVL / 2) ≤ vl ≤ VLMAX if AVL < 2 * VLMAX
3056+
* - vl = VLMAX if AVL ≥ 2 * VLMAX
3057+
*
3058+
* +------------+------+--------------+
3059+
* | vlmul[2:0] | LMUL | VLMAX |
3060+
* +------------+------+--------------+
3061+
* | 1 0 0 | - | - |
3062+
* | 1 0 1 | 1/8 | VLEN/SEW/8 |
3063+
* | 1 1 0 | 1/4 | VLEN/SEW/4 |
3064+
* | 1 1 1 | 1/2 | VLEN/SEW/2 |
3065+
* | 0 0 0 | 1 | VLEN/SEW |
3066+
* | 0 0 1 | 2 | 2*VLEN/SEW |
3067+
* | 0 1 0 | 4 | 4*VLEN/SEW |
3068+
* | 0 1 1 | 8 | 8*VLEN/SEW |
3069+
* +------------+------+--------------+
3070+
*
3071+
* LMUL determines how vector registers are grouped. Since VL controls the
3072+
* number of processed elements (based on SEW) and is derived from VLMAX,
3073+
* LMUL's primary role is setting VLMAX. This implementation computes VLMAX
3074+
* directly, avoiding fractional LMUL values (e.g., 1/2, 1/4, 1/8).
3075+
*
3076+
* Mapping of rd, rs1, and AVL value effects on vl:
3077+
* +-----+-----+------------------+----------------------------------+
3078+
* | rd | rs1 | AVL value | Effect on vl |
3079+
* +-----+-----+------------------+----------------------------------+
3080+
* | - | !x0 | Value in x[rs1] | Normal stripmining |
3081+
* | !x0 | x0 | ~0 | Set vl to VLMAX |
3082+
* | x0 | x0 | Value in vl reg | Keep existing vl |
3083+
* +-----+-----+------------------+----------------------------------+
3084+
*
3085+
* +------------+----------+
3086+
* | vsew[2:0] | SEW |
3087+
* +------------+----------+
3088+
* | 0 0 0 | 8 |
3089+
* | 0 0 1 | 16 |
3090+
* | 0 1 0 | 32 |
3091+
* | 0 1 1 | 64 |
3092+
* | 1 X X | Reserved |
3093+
* +------------+----------+
3094+
*/
3095+
3096+
#define vl_setting(vlmax_, rs1, vl) \
3097+
if ((rs1) <= vlmax_) { \
3098+
(vl) = (rs1); \
3099+
} else if ((rs1) < (2 * vlmax_)) { \
3100+
(vl) = vlmax_; \
3101+
} else { \
3102+
(vl) = vlmax_; \
3103+
}
3104+
30413105
RVOP(
30423106
vsetvli,
3043-
{ V_NOP; },
3107+
{
3108+
uint8_t v_lmul = ir->zimm & 0b111;
3109+
uint8_t v_sew = (ir->zimm >> 3) & 0b111;
3110+
3111+
if (v_lmul == 4 || v_sew >= 4) {
3112+
/* Illegal setting */
3113+
rv->csr_vl = 0;
3114+
rv->csr_vtype = 0x80000000;
3115+
return true;
3116+
}
3117+
uint16_t vlmax = (v_lmul < 4)
3118+
? ((1 << v_lmul) * VLEN) >> (3 + v_sew)
3119+
: (VLEN >> (3 + v_sew) >> (3 - (v_lmul - 5)));
3120+
if (ir->rs1) {
3121+
vl_setting(vlmax, rv->X[ir->rs1], rv->csr_vl);
3122+
rv->csr_vtype = ir->zimm;
3123+
} else {
3124+
if (!ir->rd) {
3125+
rv->csr_vtype = ir->zimm;
3126+
} else {
3127+
rv->csr_vl = vlmax;
3128+
rv->csr_vtype = ir->zimm;
3129+
}
3130+
}
3131+
rv->X[ir->rd] = rv->csr_vl;
3132+
},
30443133
GEN({
30453134
assert; /* FIXME: Implement */
30463135
}))
30473136

30483137
RVOP(
30493138
vsetivli,
3050-
{ V_NOP; },
3139+
{
3140+
uint8_t v_lmul = ir->zimm & 0b111;
3141+
uint8_t v_sew = (ir->zimm >> 3) & 0b111;
3142+
3143+
if (v_lmul == 4 || v_sew >= 4) {
3144+
/* Illegal setting */
3145+
rv->csr_vl = 0;
3146+
rv->csr_vtype = 0x80000000;
3147+
return true;
3148+
}
3149+
uint16_t vlmax = (v_lmul < 4)
3150+
? ((1 << v_lmul) * VLEN) >> (3 + v_sew)
3151+
: (VLEN >> (3 + v_sew) >> (3 - (v_lmul - 5)));
3152+
if (ir->rs1) {
3153+
vl_setting(vlmax, ir->rs1, rv->csr_vl);
3154+
rv->csr_vtype = ir->zimm;
3155+
} else {
3156+
if (!ir->rd) {
3157+
rv->csr_vtype = ir->zimm;
3158+
} else {
3159+
rv->csr_vl = vlmax;
3160+
rv->csr_vtype = ir->zimm;
3161+
}
3162+
}
3163+
rv->X[ir->rd] = rv->csr_vl;
3164+
},
30513165
GEN({
30523166
assert; /* FIXME: Implement */
30533167
}))
30543168

30553169
RVOP(
30563170
vsetvl,
3057-
{ V_NOP; },
3171+
{
3172+
uint8_t v_lmul = rv->X[ir->rs2] & 0b111;
3173+
uint8_t v_sew = (rv->X[ir->rs2] >> 3) & 0b111;
3174+
3175+
if (v_lmul == 4 || v_sew >= 4) {
3176+
/* Illegal setting */
3177+
rv->csr_vl = 0;
3178+
rv->csr_vtype = 0x80000000;
3179+
return true;
3180+
}
3181+
uint16_t vlmax = (v_lmul < 4)
3182+
? ((1 << v_lmul) * VLEN) >> (3 + v_sew)
3183+
: (VLEN >> (3 + v_sew) >> (3 - (v_lmul - 5)));
3184+
if (rv->X[ir->rs1]) {
3185+
vl_setting(vlmax, rv->X[ir->rs1], rv->csr_vl);
3186+
rv->csr_vtype = rv->X[ir->rs2];
3187+
} else {
3188+
if (!ir->rd) {
3189+
rv->csr_vtype = rv->X[ir->rs2];
3190+
} else {
3191+
rv->csr_vl = vlmax;
3192+
rv->csr_vtype = rv->X[ir->rs2];
3193+
}
3194+
}
3195+
rv->X[ir->rd] = rv->csr_vl;
3196+
},
30583197
GEN({
30593198
assert; /* FIXME: Implement */
30603199
}))
3200+
#undef vl_setting
30613201

30623202
RVOP(
30633203
vle8_v,

0 commit comments

Comments
 (0)