@@ -129,14 +129,31 @@ union sui64_fp64 {
129
129
130
130
#define KNUTH2SUM (X , Y , S , s , vlen ) \
131
131
do { \
132
- S = __riscv_vfadd((X), (Y), (vlen)); \
132
+ (S) = __riscv_vfadd((X), (Y), (vlen)); \
133
133
VFLOAT X_hat = __riscv_vfsub((S), (Y), (vlen)); \
134
- s = __riscv_vfadd( \
134
+ (s) = __riscv_vfadd( \
135
135
__riscv_vfsub((X), X_hat, (vlen)), \
136
136
__riscv_vfsub((Y), __riscv_vfsub((S), X_hat, (vlen)), (vlen)), \
137
137
(vlen)); \
138
138
} while (0)
139
139
140
+ #define FIX2FLT (X , scale , y_hi , y_lo , vlen ) \
141
+ do { \
142
+ (y_hi) = __riscv_vfcvt_f((X), (vlen)); \
143
+ (y_lo) = __riscv_vfcvt_f( \
144
+ __riscv_vsub((X), __riscv_vfcvt_x((y_hi), (vlen)), (vlen)), (vlen)); \
145
+ (y_hi) = __riscv_vfmul((y_hi), (scale), (vlen)); \
146
+ (y_lo) = __riscv_vfmul((y_lo), (scale), (vlen)); \
147
+ } while (0)
148
+
149
+ #define FLT2FIX (x_hi , x_lo , scale , Y , vlen ) \
150
+ do { \
151
+ (Y) = __riscv_vfcvt_x(__riscv_vfmul((x_hi), (scale), (vlen)), (vlen)); \
152
+ (Y) = __riscv_vadd( \
153
+ (Y), __riscv_vfcvt_x(__riscv_vfmul((x_lo), (scale), (vlen)), (vlen)), \
154
+ (vlen)); \
155
+ } while (0)
156
+
140
157
#define PROD_X1Y1 (x , y , prod_hi , prod_lo , vlen ) \
141
158
do { \
142
159
(prod_hi) = __riscv_vfmul((x), (y), (vlen)); \
@@ -158,6 +175,14 @@ union sui64_fp64 {
158
175
(prod_lo) = __riscv_vfmacc((prod_lo), (x_lo), (y_hi), (vlen)); \
159
176
} while (0)
160
177
178
+ #define SQR_X2 (x_hi , x_lo , prod_hi , prod_lo , vlen ) \
179
+ do { \
180
+ (prod_hi) = __riscv_vfmul((x_hi), (x_hi), (vlen)); \
181
+ (prod_lo) = __riscv_vfmsub((x_hi), (x_hi), (prod_hi), (vlen)); \
182
+ (prod_lo) = __riscv_vfmacc((prod_lo), (x_hi), (x_lo), (vlen)); \
183
+ (prod_lo) = __riscv_vfmacc((prod_lo), (x_lo), (x_hi), (vlen)); \
184
+ } while (0)
185
+
161
186
#define DIV_N1D2 (numer , denom , delta_d , Q , q , vlen ) \
162
187
do { \
163
188
Q = __riscv_vfdiv((numer), (denom), (vlen)); \
@@ -233,13 +258,13 @@ union sui64_fp64 {
233
258
234
259
#define FAST_LDEXP (num , exp , vlen ) \
235
260
do { \
236
- VINT n1 = __riscv_vsra((exp), 1, (vlen)); \
237
- VINT n2 = __riscv_vsub((exp), n1 , (vlen)); \
238
- n1 = __riscv_vsll(n1 , MAN_LEN, (vlen)); \
239
- num = I_AS_F(__riscv_vadd(F_AS_I((num)), n1 , (vlen))); \
240
- n2 = __riscv_vadd(n2 , EXP_BIAS, (vlen)); \
241
- n2 = __riscv_vsll(n2 , MAN_LEN, (vlen)); \
242
- num = __riscv_vfmul((num), I_AS_F(n2 ), (vlen)); \
261
+ VINT _n1 = __riscv_vsra((exp), 1, (vlen)); \
262
+ VINT _n2 = __riscv_vsub((exp), _n1 , (vlen)); \
263
+ _n1 = __riscv_vsll(_n1 , MAN_LEN, (vlen)); \
264
+ ( num) = I_AS_F(__riscv_vadd(F_AS_I((num)), _n1 , (vlen))); \
265
+ _n2 = __riscv_vadd(_n2 , EXP_BIAS, (vlen)); \
266
+ _n2 = __riscv_vsll(_n2 , MAN_LEN, (vlen)); \
267
+ ( num) = __riscv_vfmul((num), I_AS_F(_n2 ), (vlen)); \
243
268
} while (0)
244
269
245
270
// Some of the functions have multiple implementations using different
@@ -414,6 +439,13 @@ union sui64_fp64 {
414
439
#define RVVLM_EXPM1DI_VSET_CONFIG "rvvlm_fp64m2.h"
415
440
#define RVVLM_EXPM1DI_STD_EPSIM rvvlm_expm1I
416
441
442
+ // FP64 expint1 function configuration
443
+ #define RVVLM_EXPINT1D_VSET_CONFIG "rvvlm_fp64m1.h"
444
+ #define RVVLM_EXPINT1D_STD rvvlm_expint1
445
+
446
+ #define RVVLM_EXPINT1DI_VSET_CONFIG "rvvlm_fp64m1.h"
447
+ #define RVVLM_EXPINT1DI_STD rvvlm_expint1I
448
+
417
449
// FP64 log function configuration
418
450
#define RVVLM_LOGD_VSET_CONFIG "rvvlm_fp64m2.h"
419
451
#define RVVLM_LOGD_TBL128 rvvlm_logD_tbl128
@@ -671,6 +703,10 @@ void RVVLM_EXPM1D_STD_EPSIM(size_t x_len, const double *x, double *y);
671
703
void RVVLM_EXPM1DI_STD_EPSIM (size_t x_len , const double * x , size_t stride_x ,
672
704
double * y , size_t stride_y );
673
705
706
+ void RVVLM_EXPINT1D_STD (size_t x_len , const double * x , double * y );
707
+ void RVVLM_EXPINT1DI_STD (size_t x_len , const double * x , size_t stride_x ,
708
+ double * y , size_t stride_y );
709
+
674
710
void RVVLM_LOGD_TBL128 (size_t x_len , const double * x , double * y );
675
711
void RVVLM_LOGDI_TBL128 (size_t x_len , const double * x , size_t stride_x ,
676
712
double * y , size_t stride_y );
0 commit comments