@@ -3038,26 +3038,166 @@ RVOP(
3038
3038
(rv)->V[rv_reg_zero][i] = 0; \
3039
3039
}
3040
3040
3041
+ #define VREG_U32_COUNT ((VLEN) >> (5))
3042
+ /*
3043
+ * Vector Configuration-Setting Instructions
3044
+ *
3045
+ * These instructions set the vector CSRs, specifically csr_vl and csr_vtype.
3046
+ * The CSRs can only be updated using vset{i}vl{i} instructions. The current
3047
+ * implementation does not support vma and vta.
3048
+ *
3049
+ * The value VLMAX = (LMUL * VLEN) / SEW represents the maximum number of
3050
+ * elements that can be processed by a single vector instruction given the
3051
+ * current SEW and LMUL.
3052
+ *
3053
+ * Constraints on Setting vl:
3054
+ * - vl = AVL if AVL ≤ VLMAX
3055
+ * - ceil(AVL / 2) ≤ vl ≤ VLMAX if AVL < 2 * VLMAX
3056
+ * - vl = VLMAX if AVL ≥ 2 * VLMAX
3057
+ *
3058
+ * +------------+------+--------------+
3059
+ * | vlmul[2:0] | LMUL | VLMAX |
3060
+ * +------------+------+--------------+
3061
+ * | 1 0 0 | - | - |
3062
+ * | 1 0 1 | 1/8 | VLEN/SEW/8 |
3063
+ * | 1 1 0 | 1/4 | VLEN/SEW/4 |
3064
+ * | 1 1 1 | 1/2 | VLEN/SEW/2 |
3065
+ * | 0 0 0 | 1 | VLEN/SEW |
3066
+ * | 0 0 1 | 2 | 2*VLEN/SEW |
3067
+ * | 0 1 0 | 4 | 4*VLEN/SEW |
3068
+ * | 0 1 1 | 8 | 8*VLEN/SEW |
3069
+ * +------------+------+--------------+
3070
+ *
3071
+ * LMUL determines how vector registers are grouped. Since VL controls the
3072
+ * number of processed elements (based on SEW) and is derived from VLMAX,
3073
+ * LMUL's primary role is setting VLMAX. This implementation computes VLMAX
3074
+ * directly, avoiding fractional LMUL values (e.g., 1/2, 1/4, 1/8).
3075
+ *
3076
+ * Mapping of rd, rs1, and AVL value effects on vl:
3077
+ * +-----+-----+------------------+----------------------------------+
3078
+ * | rd | rs1 | AVL value | Effect on vl |
3079
+ * +-----+-----+------------------+----------------------------------+
3080
+ * | - | !x0 | Value in x[rs1] | Normal stripmining |
3081
+ * | !x0 | x0 | ~0 | Set vl to VLMAX |
3082
+ * | x0 | x0 | Value in vl reg | Keep existing vl |
3083
+ * +-----+-----+------------------+----------------------------------+
3084
+ *
3085
+ * +------------+----------+
3086
+ * | vsew[2:0] | SEW |
3087
+ * +------------+----------+
3088
+ * | 0 0 0 | 8 |
3089
+ * | 0 0 1 | 16 |
3090
+ * | 0 1 0 | 32 |
3091
+ * | 0 1 1 | 64 |
3092
+ * | 1 X X | Reserved |
3093
+ * +------------+----------+
3094
+ */
3095
+
3096
+ #define vl_setting (vlmax_ , rs1 , vl ) \
3097
+ if ((rs1) <= vlmax_) { \
3098
+ (vl) = (rs1); \
3099
+ } else if ((rs1) < (2 * vlmax_)) { \
3100
+ (vl) = vlmax_; \
3101
+ } else { \
3102
+ (vl) = vlmax_; \
3103
+ }
3104
+
3041
3105
RVOP (
3042
3106
vsetvli ,
3043
- { V_NOP ; },
3107
+ {
3108
+ uint8_t v_lmul = ir -> zimm & 0b111 ;
3109
+ uint8_t v_sew = (ir -> zimm >> 3 ) & 0b111 ;
3110
+
3111
+ if (v_lmul == 4 || v_sew >= 4 ) {
3112
+ /* Illegal setting */
3113
+ rv -> csr_vl = 0 ;
3114
+ rv -> csr_vtype = 0x80000000 ;
3115
+ return true;
3116
+ }
3117
+ uint16_t vlmax = (v_lmul < 4 )
3118
+ ? ((1 << v_lmul ) * VLEN ) >> (3 + v_sew )
3119
+ : (VLEN >> (3 + v_sew ) >> (3 - (v_lmul - 5 )));
3120
+ if (ir -> rs1 ) {
3121
+ vl_setting (vlmax , rv -> X [ir -> rs1 ], rv -> csr_vl );
3122
+ rv -> csr_vtype = ir -> zimm ;
3123
+ } else {
3124
+ if (!ir -> rd ) {
3125
+ rv -> csr_vtype = ir -> zimm ;
3126
+ } else {
3127
+ rv -> csr_vl = vlmax ;
3128
+ rv -> csr_vtype = ir -> zimm ;
3129
+ }
3130
+ }
3131
+ rv -> X [ir -> rd ] = rv -> csr_vl ;
3132
+ },
3044
3133
GEN ({
3045
3134
assert ; /* FIXME: Implement */
3046
3135
}))
3047
3136
3048
3137
RVOP (
3049
3138
vsetivli ,
3050
- { V_NOP ; },
3139
+ {
3140
+ uint8_t v_lmul = ir -> zimm & 0b111 ;
3141
+ uint8_t v_sew = (ir -> zimm >> 3 ) & 0b111 ;
3142
+
3143
+ if (v_lmul == 4 || v_sew >= 4 ) {
3144
+ /* Illegal setting */
3145
+ rv -> csr_vl = 0 ;
3146
+ rv -> csr_vtype = 0x80000000 ;
3147
+ return true;
3148
+ }
3149
+ uint16_t vlmax = (v_lmul < 4 )
3150
+ ? ((1 << v_lmul ) * VLEN ) >> (3 + v_sew )
3151
+ : (VLEN >> (3 + v_sew ) >> (3 - (v_lmul - 5 )));
3152
+ if (ir -> rs1 ) {
3153
+ vl_setting (vlmax , ir -> rs1 , rv -> csr_vl );
3154
+ rv -> csr_vtype = ir -> zimm ;
3155
+ } else {
3156
+ if (!ir -> rd ) {
3157
+ rv -> csr_vtype = ir -> zimm ;
3158
+ } else {
3159
+ rv -> csr_vl = vlmax ;
3160
+ rv -> csr_vtype = ir -> zimm ;
3161
+ }
3162
+ }
3163
+ rv -> X [ir -> rd ] = rv -> csr_vl ;
3164
+ },
3051
3165
GEN ({
3052
3166
assert ; /* FIXME: Implement */
3053
3167
}))
3054
3168
3055
3169
RVOP (
3056
3170
vsetvl ,
3057
- { V_NOP ; },
3171
+ {
3172
+ uint8_t v_lmul = rv -> X [ir -> rs2 ] & 0b111 ;
3173
+ uint8_t v_sew = (rv -> X [ir -> rs2 ] >> 3 ) & 0b111 ;
3174
+
3175
+ if (v_lmul == 4 || v_sew >= 4 ) {
3176
+ /* Illegal setting */
3177
+ rv -> csr_vl = 0 ;
3178
+ rv -> csr_vtype = 0x80000000 ;
3179
+ return true;
3180
+ }
3181
+ uint16_t vlmax = (v_lmul < 4 )
3182
+ ? ((1 << v_lmul ) * VLEN ) >> (3 + v_sew )
3183
+ : (VLEN >> (3 + v_sew ) >> (3 - (v_lmul - 5 )));
3184
+ if (rv -> X [ir -> rs1 ]) {
3185
+ vl_setting (vlmax , rv -> X [ir -> rs1 ], rv -> csr_vl );
3186
+ rv -> csr_vtype = rv -> X [ir -> rs2 ];
3187
+ } else {
3188
+ if (!ir -> rd ) {
3189
+ rv -> csr_vtype = rv -> X [ir -> rs2 ];
3190
+ } else {
3191
+ rv -> csr_vl = vlmax ;
3192
+ rv -> csr_vtype = rv -> X [ir -> rs2 ];
3193
+ }
3194
+ }
3195
+ rv -> X [ir -> rd ] = rv -> csr_vl ;
3196
+ },
3058
3197
GEN ({
3059
3198
assert ; /* FIXME: Implement */
3060
3199
}))
3200
+ #undef vl_setting
3061
3201
3062
3202
RVOP (
3063
3203
vle8_v ,
0 commit comments