Skip to content

Commit

Permalink
Merge pull request #79 from ocaml/no-asm
Browse files Browse the repository at this point in the history
Writing the fast path in OCaml instead of in assembly language
  • Loading branch information
xavierleroy authored Jan 4, 2021
2 parents bee604e + d8b77d5 commit 0fd2478
Show file tree
Hide file tree
Showing 8 changed files with 326 additions and 1,558 deletions.
118 changes: 88 additions & 30 deletions caml_z.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ extern "C" {

#ifdef _MSC_VER
#include <float.h>
#include <intrin.h>
#endif

/* The "__has_builtin" special macro from Clang */
#ifdef __has_builtin
#define HAS_BUILTIN(x) __has_builtin(x)
#else
#define HAS_BUILTIN(x) 0
#endif

/*---------------------------------------------------
Expand All @@ -71,6 +79,14 @@ extern "C" {
#define Z_FAST_PATH 1
#define Z_USE_NATINT 1

/* Whether the fast path (arguments and result are small integers)
has already be handled in OCaml, so that there is no need to
re-test for it in C functions.
Applies to: neg, abs, add, sub, mul, div, rem, succ, pred,
logand, logor, logxor, lognot, shifts, divexact.
*/
#define Z_FAST_PATH_IN_OCAML 1

/* Sanity checks. */
#define Z_PERFORM_CHECK 0

Expand Down Expand Up @@ -1267,7 +1283,7 @@ CAMLprim value ml_z_neg(value arg)
{
Z_MARK_OP;
Z_CHECK(arg);
#if Z_FAST_PATH && !defined(Z_ASM_neg)
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg)) {
/* fast path */
if (arg > Val_long(Z_MIN_INT)) return 2 - arg;
Expand All @@ -1293,7 +1309,7 @@ CAMLprim value ml_z_abs(value arg)
{
Z_MARK_OP;
Z_CHECK(arg);
#if Z_FAST_PATH && !defined(Z_ASM_abs)
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg)) {
/* fast path */
if (arg >= Val_long(0)) return arg;
Expand Down Expand Up @@ -1402,7 +1418,7 @@ CAMLprim value ml_z_add(value arg1, value arg2)
{
Z_MARK_OP;
Z_CHECK(arg1); Z_CHECK(arg2);
#if Z_FAST_PATH && !defined(Z_ASM_add)
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg1) && Is_long(arg2)) {
/* fast path */
intnat a1 = Long_val(arg1);
Expand All @@ -1420,7 +1436,7 @@ CAMLprim value ml_z_sub(value arg1, value arg2)
{
Z_MARK_OP;
Z_CHECK(arg1); Z_CHECK(arg2);
#if Z_FAST_PATH && !defined(Z_ASM_sub)
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg1) && Is_long(arg2)) {
/* fast path */
intnat a1 = Long_val(arg1);
Expand All @@ -1434,19 +1450,67 @@ CAMLprim value ml_z_sub(value arg1, value arg2)
return ml_z_addsub(arg1, arg2, Z_SIGN_MASK);
}

CAMLprim value ml_z_mul_overflows(value vx, value vy)
{
#if HAS_BUILTIN(__builtin_mul_overflow) || __GNUC__ >= 5
intnat z;
return Val_bool(__builtin_mul_overflow(vx - 1, vy >> 1, &z));
#elif defined(__GNUC__) && defined(__x86_64__)
intnat z;
unsigned char o;
asm("imulq %1, %3; seto %0"
: "=q" (o), "=r" (z)
: "1" (vx - 1), "r" (vy >> 1)
: "cc");
return Val_int(o);
#elif defined(_MSC_VER) && defined(_M_X64)
intnat hi, lo;
lo = _mul128(vx - 1, vy >> 1, &hi);
return Val_bool(hi != lo >> 63);
#else
/* Portable C code */
intnat x = Long_val(vx);
intnat y = Long_val(vy);
/* Quick approximate check for small values of x and y.
Also catches the cases x = 0, x = 1, y = 0, y = 1. */
if (Z_FITS_HINT(x)) {
if (Z_FITS_HINT(y)) return Val_false;
if ((uintnat) x <= 1) return Val_false;
}
if ((uintnat) y <= 1) return Val_false;
#if 1
/* Give up at this point; we'll go through the general case in ml_z_mul */
return Val_true;
#else
/* The product x*y is representable as an unboxed integer if
it is in [Z_MIN_INT, Z_MAX_INT].
x >= 0 y >= 0: x*y >= 0 and x*y <= Z_MAX_INT <-> y <= Z_MAX_INT / x
x < 0 y >= 0: x*y <= 0 and x*y >= Z_MIN_INT <-> x >= Z_MIN_INT / y
x >= 0 y < 0 : x*y <= 0 and x*y >= Z_MIN_INT <-> y >= Z_MIN_INT / x
x < 0 y < 0 : x*y >= 0 and x*y <= Z_MAX_INT <-> x >= Z_MAX_INT / y */
if (x >= 0)
if (y >= 0)
return Val_bool(y > Z_MAX_INT / x);
else
return Val_bool(y < Z_MIN_INT / x);
else
if (y >= 0)
return Val_bool(x < Z_MIN_INT / y);
else
return Val_bool(x < Z_MAX_INT / y);
#endif
#endif
}

CAMLprim value ml_z_mul(value arg1, value arg2)
{
Z_DECL(arg1); Z_DECL(arg2);
Z_MARK_OP;
Z_CHECK(arg1); Z_CHECK(arg2);
#if Z_FAST_PATH && !defined(Z_ASM_mul)
if (Is_long(arg1) && Is_long(arg2)) {
/* fast path */
intnat a1 = Long_val(arg1);
intnat a2 = Long_val(arg2);
if (!a1 || !a2) return Val_long(0);
/* small argument case */
if (Z_FITS_HINT(arg1) && Z_FITS_HINT(arg2)) return Val_long(a1 * a2);
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg1) && Is_long(arg2) &&
ml_z_mul_overflows(arg1, arg2) == Val_false) {
return Val_long(Long_val(a1) * Long_val(a2));
}
#endif
/* mpn_ version */
Expand Down Expand Up @@ -1554,7 +1618,7 @@ CAMLprim value ml_z_div(value arg1, value arg2)
{
Z_MARK_OP;
Z_CHECK(arg1); Z_CHECK(arg2);
#if Z_FAST_PATH && !defined(Z_ASM_div)
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg1) && Is_long(arg2)) {
/* fast path */
intnat a1 = Long_val(arg1);
Expand All @@ -1574,7 +1638,7 @@ CAMLprim value ml_z_rem(value arg1, value arg2)
{
Z_MARK_OP;
Z_CHECK(arg1); Z_CHECK(arg2);
#if Z_FAST_PATH && !defined(Z_ASM_rem)
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg1) && Is_long(arg2)) {
/* fast path */
intnat a1 = Long_val(arg1);
Expand Down Expand Up @@ -1706,7 +1770,7 @@ CAMLprim value ml_z_succ(value arg)
{
Z_MARK_OP;
Z_CHECK(arg);
#if Z_FAST_PATH && !defined(Z_ASM_succ)
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg)) {
/* fast path */
if (arg < Val_long(Z_MAX_INT)) return arg + 2;
Expand All @@ -1721,7 +1785,7 @@ CAMLprim value ml_z_pred(value arg)
{
Z_MARK_OP;
Z_CHECK(arg);
#if Z_FAST_PATH && !defined(Z_ASM_pred)
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg)) {
/* fast path */
if (arg > Val_long(Z_MIN_INT)) return arg - 2;
Expand Down Expand Up @@ -1929,7 +1993,7 @@ CAMLprim value ml_z_logand(value arg1, value arg2)
{
Z_MARK_OP;
Z_CHECK(arg1); Z_CHECK(arg2);
#if Z_FAST_PATH && !defined(Z_ASM_logand)
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg1) && Is_long(arg2)) {
/* fast path */
return arg1 & arg2;
Expand Down Expand Up @@ -2011,7 +2075,7 @@ CAMLprim value ml_z_logor(value arg1, value arg2)
{
Z_MARK_OP;
Z_CHECK(arg1); Z_CHECK(arg2);
#if Z_FAST_PATH && !defined(Z_ASM_logor)
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg1) && Is_long(arg2)) {
/* fast path */
return arg1 | arg2;
Expand Down Expand Up @@ -2097,7 +2161,7 @@ CAMLprim value ml_z_logxor(value arg1, value arg2)
{
Z_MARK_OP;
Z_CHECK(arg1); Z_CHECK(arg2);
#if Z_FAST_PATH && !defined(Z_ASM_logxor)
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg1) && Is_long(arg2)) {
/* fast path */
return (arg1 ^ arg2) | 1;
Expand Down Expand Up @@ -2183,7 +2247,7 @@ CAMLprim value ml_z_lognot(value arg)
{
Z_MARK_OP;
Z_CHECK(arg);
#if Z_FAST_PATH && !defined(Z_ASM_lognot)
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg)) {
/* fast path */
return (~arg) | 1;
Expand Down Expand Up @@ -2232,7 +2296,7 @@ CAMLprim value ml_z_shift_left(value arg, value count)
if (!c) return arg;
c1 = c / Z_LIMB_BITS;
c2 = c % Z_LIMB_BITS;
#if Z_FAST_PATH && !defined(Z_ASM_shift_left)
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg) && !c1) {
/* fast path */
value a = arg - 1;
Expand Down Expand Up @@ -2281,7 +2345,7 @@ CAMLprim value ml_z_shift_right(value arg, value count)
if (!c) return arg;
c1 = c / Z_LIMB_BITS;
c2 = c % Z_LIMB_BITS;
#if Z_FAST_PATH && !defined(Z_ASM_shift_right)
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg)) {
/* fast path */
if (c1) {
Expand Down Expand Up @@ -2342,7 +2406,7 @@ CAMLprim value ml_z_shift_right_trunc(value arg, value count)
if (!c) return arg;
c1 = c / Z_LIMB_BITS;
c2 = c % Z_LIMB_BITS;
#if Z_FAST_PATH
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg)) {
/* fast path */
if (c1) return Val_long(0);
Expand Down Expand Up @@ -2661,7 +2725,7 @@ CAMLprim value ml_z_divexact(value arg1, value arg2)
Z_DECL(arg1); Z_DECL(arg2);
Z_MARK_OP;
Z_CHECK(arg1); Z_CHECK(arg2);
#if Z_FAST_PATH
#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
if (Is_long(arg1) && Is_long(arg2)) {
/* fast path */
intnat a1 = Long_val(arg1);
Expand Down Expand Up @@ -3367,12 +3431,6 @@ static void ml_z_dump_count()
}
#endif

CAMLprim value ml_z_install_frametable()
{
/* nothing to do for bytecode version */
return Val_unit;
}

CAMLprim value ml_z_init()
{
ml_z_2p32 = ldexp(1., 32);
Expand Down
Loading

0 comments on commit 0fd2478

Please sign in to comment.