Skip to content

Commit bddd67f

Browse files
Merge pull request #29 from rivosinc/dev/PingTakPeterTang/gamma
add the FP64 true gamma function tgamma
2 parents 194eb48 + ca948ac commit bddd67f

29 files changed

+816
-143
lines changed

CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ set(PROJECT_SOURCES
9898
src/rvvlm_sinhDI.c
9999
src/rvvlm_tanhD.c
100100
src/rvvlm_tanhDI.c
101+
src/rvvlm_tgammaD.c
102+
src/rvvlm_tgammaDI.c
101103
)
102104

103105
add_library(vecm

include/rvvlm.h

+23
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,17 @@ union sui64_fp64 {
172172
(delta_Q) = __riscv_vfmul(_q, __riscv_vfrec7((denom), (vlen)), (vlen)); \
173173
} while (0)
174174

175+
#define ACC_DIV2_N2D2(numer, delta_n, denom, delta_d, Q, delta_Q, vlen) \
176+
do { \
177+
VFLOAT _recip, _q; \
178+
_recip = __riscv_vfrdiv((denom), 0x1.0p0, (vlen)); \
179+
(Q) = __riscv_vfmul((numer), _recip, (vlen)); \
180+
_q = __riscv_vfnmsub((Q), (denom), (numer), (vlen)); \
181+
_q = __riscv_vfnmsac(_q, (Q), (delta_d), (vlen)); \
182+
_q = __riscv_vfadd(_q, (delta_n), (vlen)); \
183+
(delta_Q) = __riscv_vfmul(_q, _recip, (vlen)); \
184+
} while (0)
185+
175186
#define SQRT2_X2(x, delta_x, r, delta_r, vlen) \
176187
do { \
177188
VFLOAT xx = __riscv_vfadd((x), (delta_x), (vlen)); \
@@ -469,6 +480,13 @@ union sui64_fp64 {
469480
#define RVVLM_TANPIDI_VSET_CONFIG "rvvlm_fp64m2.h"
470481
#define RVVLM_TANPIDI_MERGED rvvlm_tanpiI
471482

483+
// FP64 tgamma function configuration
484+
#define RVVLM_TGAMMAD_VSET_CONFIG "rvvlm_fp64m1.h"
485+
#define RVVLM_TGAMMAD_STD rvvlm_tgamma
486+
487+
#define RVVLM_TGAMMADI_VSET_CONFIG "rvvlm_fp64m1.h"
488+
#define RVVLM_TGAMMADI_STD rvvlm_tgammaI
489+
472490
// FP64 cosh function configuration
473491
#define RVVLM_COSHD_VSET_CONFIG "rvvlm_fp64m2.h"
474492
#define RVVLM_COSHD_STD rvvlm_coshD_std
@@ -499,6 +517,7 @@ extern int64_t expD_tbl64_fixedpt[64];
499517
extern int64_t logD_tbl128_fixedpt[128];
500518
extern double logtbl_4_powD_128_hi_lo[256];
501519
extern double dbl_2ovpi_tbl[28];
520+
extern int64_t factorial_fixedpt[180];
502521

503522
// Define the functions in the vector math library
504523
void RVVLM_ACOSD_FIXEDPT(size_t x_len, const double *x, double *y);
@@ -703,6 +722,10 @@ void RVVLM_TANHD_STD(size_t x_len, const double *x, double *y);
703722
void RVVLM_TANHDI_STD(size_t x_len, const double *x, size_t stride_x, double *y,
704723
size_t stride_y);
705724

725+
void RVVLM_TGAMMAD_STD(size_t x_len, const double *x, double *y);
726+
void RVVLM_TGAMMADI_STD(size_t x_len, const double *x, size_t stride_x,
727+
double *y, size_t stride_y);
728+
706729
#ifdef __cplusplus
707730
}
708731
#endif

include/rvvlm_gammafuncsD.h

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// SPDX-FileCopyrightText: 2023 Rivos Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
// gamma(+inf) = +inf; gamma(-inf/sNaN) is qNaN with invalid
6+
// gamma(qNaN) is qNaN
7+
// gamma(+-0) is +-inf and divide by 0
8+
// gamma(tiny) is 1/tiny
9+
#define EXCEPTION_HANDLING_TGAMMA(vx, special_args, vy_special, vlen) \
10+
do { \
11+
VUINT expo_x = __riscv_vand(__riscv_vsrl(F_AS_U((vx)), MAN_LEN, (vlen)), \
12+
0x7FF, (vlen)); \
13+
VBOOL x_small = __riscv_vmsltu(expo_x, EXP_BIAS - 60, (vlen)); \
14+
VBOOL x_InfNaN = __riscv_vmseq(expo_x, 0x7FF, (vlen)); \
15+
(special_args) = __riscv_vmor(x_small, x_InfNaN, (vlen)); \
16+
if (__riscv_vcpop((special_args), (vlen)) > 0) { \
17+
VUINT vclass = __riscv_vfclass((vx), (vlen)); \
18+
VBOOL x_negInf; \
19+
IDENTIFY(vclass, class_negInf, x_negInf, (vlen)); \
20+
(vx) = __riscv_vfmerge((vx), fp_sNaN, x_negInf, (vlen)); \
21+
VFLOAT y_tmp = __riscv_vfadd(x_InfNaN, (vx), (vx), (vlen)); \
22+
(vy_special) = __riscv_vmerge((vy_special), y_tmp, x_InfNaN, (vlen)); \
23+
y_tmp = __riscv_vfrdiv(x_small, (vx), fp_posOne, (vlen)); \
24+
(vy_special) = __riscv_vmerge((vy_special), y_tmp, x_small, (vlen)); \
25+
(vx) = __riscv_vfmerge((vx), fp_posOne, (special_args), (vlen)); \
26+
} \
27+
} while (0)

include/rvvlm_tgammaD.inc.h

+446
Large diffs are not rendered by default.

src/rvvlm_tgammaD.c

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// SPDX-FileCopyrightText: 2023 Rivos Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
#include <riscv_vector.h>
6+
#include <stdio.h>
7+
8+
#include "rvvlm.h"
9+
#define API_SIGNATURE API_SIGNATURE_11
10+
#define STRIDE UNIT_STRIDE
11+
12+
#include RVVLM_TGAMMAD_VSET_CONFIG
13+
14+
#include "rvvlm_gammafuncsD.h"
15+
16+
#include "rvvlm_tgammaD.inc.h"

src/rvvlm_tgammaDI.c

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// SPDX-FileCopyrightText: 2023 Rivos Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
#include <riscv_vector.h>
6+
#include <stdio.h>
7+
8+
#include "rvvlm.h"
9+
#define API_SIGNATURE API_SIGNATURE_11
10+
#define STRIDE GENERAL_STRIDE
11+
12+
#include RVVLM_TGAMMADI_VSET_CONFIG
13+
14+
#include "rvvlm_gammafuncsD.h"
15+
16+
#include "rvvlm_tgammaD.inc.h"

test/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ set(TEST_SOURCES
7979
src/test_sinhI.cpp
8080
src/test_tanh.cpp
8181
src/test_tanhI.cpp
82+
src/test_tgamma.cpp
83+
src/test_tgammaI.cpp
8284
)
8385

8486
add_executable(test_veclibm ${TEST_SOURCES})

test/include/test_infra.h

+5
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,8 @@ long double recip_scale(long double);
101101
long double erfl_prime(long double);
102102
long double erfcl_prime(long double);
103103
long double cdfnorml_prime(long double);
104+
long double log_4_stirling(long double);
105+
long double stirling_power(long double);
106+
long double stirling_correction(long double);
107+
long double tgammal_mod(long double);
108+
long double sinpix_by_pi(long double);

test/src/test_acos.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,21 @@ TEST(acos, test) {
1919
x_start = -0x1.0p-40;
2020
x_end = 0x1.0p-40;
2121
;
22-
nb_tests = 100000;
22+
nb_tests = 10000;
2323
report_err_fp64(rvvlm_acos, acosl, x_start, x_end, nb_tests);
2424

2525
x_start = -0.5;
2626
x_end = 0.5;
27-
nb_tests = 100000;
27+
nb_tests = 10000;
2828
report_err_fp64(rvvlm_acos, acosl, x_start, x_end, nb_tests);
2929

3030
x_start = 0.5;
3131
x_end = 1.0;
32-
nb_tests = 100000;
32+
nb_tests = 10000;
3333
report_err_fp64(rvvlm_acos, acosl, x_start, x_end, nb_tests);
3434

3535
x_start = -1.0;
3636
x_end = -0.5;
37-
nb_tests = 100000;
37+
nb_tests = 10000;
3838
report_err_fp64(rvvlm_acos, acosl, x_start, x_end, nb_tests);
3939
}

test/src/test_acospi.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,21 @@ TEST(acospi, test) {
1818

1919
x_start = -0x1.0p-40;
2020
x_end = 0x1.0p-40;
21-
nb_tests = 100000;
21+
nb_tests = 10000;
2222
report_err_fp64(rvvlm_acospi, acospil, x_start, x_end, nb_tests);
2323

2424
x_start = -0.5;
2525
x_end = 0.5;
26-
nb_tests = 100000;
26+
nb_tests = 10000;
2727
report_err_fp64(rvvlm_acospi, acospil, x_start, x_end, nb_tests);
2828

2929
x_start = 0.5;
3030
x_end = 1.0;
31-
nb_tests = 100000;
31+
nb_tests = 10000;
3232
report_err_fp64(rvvlm_acospi, acospil, x_start, x_end, nb_tests);
3333

3434
x_start = -1.0;
3535
x_end = -0.5;
36-
nb_tests = 100000;
36+
nb_tests = 10000;
3737
report_err_fp64(rvvlm_acospi, acospil, x_start, x_end, nb_tests);
3838
}

test/src/test_asinh.cpp

+8-8
Original file line numberDiff line numberDiff line change
@@ -18,41 +18,41 @@ TEST(asinh, test) {
1818

1919
x_start = 0x1.0p-40;
2020
x_end = 0x1.0p-35;
21-
nb_tests = 40000;
21+
nb_tests = 20000;
2222
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
2323

2424
x_start = -0x1.0p-35;
2525
x_end = -0x1.0p-40;
26-
nb_tests = 40000;
26+
nb_tests = 20000;
2727
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
2828

2929
x_start = 0x1.0p-20;
3030
x_end = 0x1.0p-10;
31-
nb_tests = 40000;
31+
nb_tests = 20000;
3232
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
3333

3434
x_start = 0x1.0p-6;
3535
x_end = 0x1.0p0;
36-
nb_tests = 40000;
36+
nb_tests = 20000;
3737
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
3838

3939
x_start = 0x1.0p0;
4040
x_end = 0x1.0p2;
41-
nb_tests = 40000;
41+
nb_tests = 20000;
4242
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
4343

4444
x_start = -0x1.0p0;
4545
x_end = -0x1.0p2;
46-
nb_tests = 40000;
46+
nb_tests = 20000;
4747
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
4848

4949
x_start = 0x1.0p490;
5050
x_end = 0x1.0p520;
51-
nb_tests = 40000;
51+
nb_tests = 20000;
5252
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
5353

5454
x_start = 0x1.0p1020;
5555
x_end = 0x1.FFFFFFFFFFp1023;
56-
nb_tests = 40000;
56+
nb_tests = 20000;
5757
report_err_fp64(rvvlm_asinh, asinhl, x_start, x_end, nb_tests);
5858
}

test/src/test_atan2.cpp

+12-12
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ TEST(atan2, test) {
2424
nb_x = 8;
2525
y_start = 0x1.01p0;
2626
y_end = 0x1.fffp0;
27-
nb_y = 20000;
27+
nb_y = 2000;
2828
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
2929
nb_y, swap_xy);
3030

@@ -34,7 +34,7 @@ TEST(atan2, test) {
3434
nb_x = 8;
3535
y_start = 0x1.01p1020;
3636
y_end = 0x1.ffffffffp1020;
37-
nb_y = 20000;
37+
nb_y = 2000;
3838
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
3939
nb_y, swap_xy);
4040

@@ -44,7 +44,7 @@ TEST(atan2, test) {
4444
nb_x = 8;
4545
y_start = 0x1.01p-1020;
4646
y_end = 0x1.ffffffffp-1020;
47-
nb_y = 20000;
47+
nb_y = 2000;
4848
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
4949
nb_y, swap_xy);
5050

@@ -54,7 +54,7 @@ TEST(atan2, test) {
5454
nb_x = 8;
5555
y_start = 0x1.01p0;
5656
y_end = 0x1.fffp0;
57-
nb_y = 20000;
57+
nb_y = 2000;
5858
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
5959
nb_y, swap_xy);
6060

@@ -64,7 +64,7 @@ TEST(atan2, test) {
6464
nb_x = 8;
6565
y_start = 0x1.01p1020;
6666
y_end = 0x1.ffffffffp1020;
67-
nb_y = 20000;
67+
nb_y = 2000;
6868
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
6969
nb_y, swap_xy);
7070

@@ -74,7 +74,7 @@ TEST(atan2, test) {
7474
nb_x = 8;
7575
y_start = 0x1.01p-1020;
7676
y_end = 0x1.ffffffffp-1020;
77-
nb_y = 20000;
77+
nb_y = 2000;
7878
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
7979
nb_y, swap_xy);
8080

@@ -84,7 +84,7 @@ TEST(atan2, test) {
8484
nb_x = 8;
8585
y_start = -0x1.01p0;
8686
y_end = -0x1.fffp0;
87-
nb_y = 20000;
87+
nb_y = 2000;
8888
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
8989
nb_y, swap_xy);
9090

@@ -94,7 +94,7 @@ TEST(atan2, test) {
9494
nb_x = 8;
9595
y_start = -0x1.01p1020;
9696
y_end = -0x1.ffffffffp1020;
97-
nb_y = 20000;
97+
nb_y = 2000;
9898
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
9999
nb_y, swap_xy);
100100

@@ -104,7 +104,7 @@ TEST(atan2, test) {
104104
nb_x = 8;
105105
y_start = -0x1.01p-1020;
106106
y_end = -0x1.ffffffffp-1020;
107-
nb_y = 200000;
107+
nb_y = 2000;
108108
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
109109
nb_y, swap_xy);
110110

@@ -114,7 +114,7 @@ TEST(atan2, test) {
114114
nb_x = 8;
115115
y_start = -0x1.01p0;
116116
y_end = -0x1.fffp0;
117-
nb_y = 20000;
117+
nb_y = 2000;
118118
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
119119
nb_y, swap_xy);
120120

@@ -124,7 +124,7 @@ TEST(atan2, test) {
124124
nb_x = 8;
125125
y_start = -0x1.01p1020;
126126
y_end = -0x1.ffffffffp1020;
127-
nb_y = 20000;
127+
nb_y = 2000;
128128
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
129129
nb_y, swap_xy);
130130

@@ -134,7 +134,7 @@ TEST(atan2, test) {
134134
nb_x = 8;
135135
y_start = -0x1.01p-1020;
136136
y_end = -0x1.ffffffffp-1020;
137-
nb_y = 20000;
137+
nb_y = 2000;
138138
report_err2_fp64(rvvlm_atan2, atan2l, x_start, x_end, nb_x, y_start, y_end,
139139
nb_y, swap_xy);
140140
}

0 commit comments

Comments
 (0)