From 973de044b10a389a45c68b4002a696e99b8874a7 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Mon, 4 Jan 2021 12:53:20 +0100 Subject: [PATCH 1/4] Remove install_frame_tables This was empty code. --- caml_z.c | 6 ------ z.mlp | 3 --- 2 files changed, 9 deletions(-) diff --git a/caml_z.c b/caml_z.c index f34c55c..9867de7 100644 --- a/caml_z.c +++ b/caml_z.c @@ -3367,12 +3367,6 @@ static void ml_z_dump_count() } #endif -CAMLprim value ml_z_install_frametable() -{ - /* nothing to do for bytecode version */ - return Val_unit; -} - CAMLprim value ml_z_init() { ml_z_2p32 = ldexp(1., 32); diff --git a/z.mlp b/z.mlp index 09377ab..1e979d4 100644 --- a/z.mlp +++ b/z.mlp @@ -23,9 +23,6 @@ exception Overflow external init: unit -> unit = "ml_z_init" let _ = init () -external install_frametable: unit -> unit = install_frametable@ASM -let _ = install_frametable () - let _ = Callback.register_exception "ml_z_overflow" Overflow external neg: t -> t = neg@ASM From f9a3d0d911c9662952c393b853b3cb2ca3cdd32c Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Mon, 4 Jan 2021 12:56:32 +0100 Subject: [PATCH 2/4] Program the fast paths in OCaml instead of in asm or in C Benefits: - The fast paths no longer incurs the overhead of caml_c_call. Hence, even though the OCaml fast-path code has more instructions than the hand-coded asm code, Z arithmetic gets faster on small integers. - For multiplication, the test for overflow cannot be efficiently implemented in OCaml. We call a C function of the "noalloc" kind, again avoiding caml_c_call. The C function uses the __builtin_mul_overflow from GCC >=5 and from Clang if available, or the _mul128 intrinsic from MSVC. Again, this executes more instructions than the hand-coded asm code, but we still get comparable performance. - We get rid of the asm files, which were not small and painful to adapt to multiple ABIs for the same processor. --- caml_z.c | 112 ++++++++--- caml_z_arm.S | 265 -------------------------- caml_z_i686.S | 406 ---------------------------------------- caml_z_x86_64.S | 396 --------------------------------------- caml_z_x86_64_mingw64.S | 381 ------------------------------------- z.mlip | 79 ++++---- z.mlp | 225 ++++++++++++++++++---- 7 files changed, 321 insertions(+), 1543 deletions(-) delete mode 100644 caml_z_arm.S delete mode 100644 caml_z_i686.S delete mode 100644 caml_z_x86_64.S delete mode 100644 caml_z_x86_64_mingw64.S diff --git a/caml_z.c b/caml_z.c index 9867de7..da0957d 100644 --- a/caml_z.c +++ b/caml_z.c @@ -58,6 +58,14 @@ extern "C" { #ifdef _MSC_VER #include +#include +#endif + +/* The "__has_builtin" special macro from Clang */ +#ifdef __has_builtin +#define HAS_BUILTIN(x) __has_builtin(x) +#else +#define HAS_BUILTIN(x) 0 #endif /*--------------------------------------------------- @@ -71,6 +79,14 @@ extern "C" { #define Z_FAST_PATH 1 #define Z_USE_NATINT 1 +/* Whether the fast path (arguments and result are small integers) + has already be handled in OCaml, so that there is no need to + re-test for it in C functions. + Applies to: neg, abs, add, sub, mul, div, rem, succ, pred, + logand, logor, logxor, lognot, shifts, divexact. +*/ +#define Z_FAST_PATH_IN_OCAML 1 + /* Sanity checks. */ #define Z_PERFORM_CHECK 0 @@ -1267,7 +1283,7 @@ CAMLprim value ml_z_neg(value arg) { Z_MARK_OP; Z_CHECK(arg); -#if Z_FAST_PATH && !defined(Z_ASM_neg) +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg)) { /* fast path */ if (arg > Val_long(Z_MIN_INT)) return 2 - arg; @@ -1293,7 +1309,7 @@ CAMLprim value ml_z_abs(value arg) { Z_MARK_OP; Z_CHECK(arg); -#if Z_FAST_PATH && !defined(Z_ASM_abs) +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg)) { /* fast path */ if (arg >= Val_long(0)) return arg; @@ -1402,7 +1418,7 @@ CAMLprim value ml_z_add(value arg1, value arg2) { Z_MARK_OP; Z_CHECK(arg1); Z_CHECK(arg2); -#if Z_FAST_PATH && !defined(Z_ASM_add) +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg1) && Is_long(arg2)) { /* fast path */ intnat a1 = Long_val(arg1); @@ -1420,7 +1436,7 @@ CAMLprim value ml_z_sub(value arg1, value arg2) { Z_MARK_OP; Z_CHECK(arg1); Z_CHECK(arg2); -#if Z_FAST_PATH && !defined(Z_ASM_sub) +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg1) && Is_long(arg2)) { /* fast path */ intnat a1 = Long_val(arg1); @@ -1434,19 +1450,67 @@ CAMLprim value ml_z_sub(value arg1, value arg2) return ml_z_addsub(arg1, arg2, Z_SIGN_MASK); } +CAMLprim value ml_z_mul_overflows(value vx, value vy) +{ +#if HAS_BUILTIN(__builtin_mul_overflow) || __GNUC__ >= 5 + intnat z; + return Val_bool(__builtin_mul_overflow(vx - 1, vy >> 1, &z)); +#elif defined(__GNUC__) && defined(__x86_64__) + intnat z; + unsigned char o; + asm("imulq %1, %3; seto %0" + : "=q" (o), "=r" (z) + : "1" (vx - 1), "r" (vy >> 1) + : "cc"); + return Val_int(o); +#elif defined(_MSC_VER) && defined(_M_X64) + intnat hi, lo; + lo = _mul128(vx - 1, vy >> 1, &hi); + return Val_bool(hi != lo >> 63); +#else + /* Portable C code */ + intnat x = Long_val(vx); + intnat y = Long_val(vy); + /* Quick approximate check for small values of x and y. + Also catches the cases x = 0, x = 1, y = 0, y = 1. */ + if (Z_FITS_HINT(x)) { + if (Z_FITS_HINT(y)) return Val_false; + if ((uintnat) x <= 1) return Val_false; + } + if ((uintnat) y <= 1) return Val_false; +#if 1 + /* Give up at this point; we'll go through the general case in ml_z_mul */ + return Val_true; +#else + /* The product x*y is representable as an unboxed integer if + it is in [Z_MIN_INT, Z_MAX_INT]. + x >= 0 y >= 0: x*y >= 0 and x*y <= Z_MAX_INT <-> y <= Z_MAX_INT / x + x < 0 y >= 0: x*y <= 0 and x*y >= Z_MIN_INT <-> x >= Z_MIN_INT / y + x >= 0 y < 0 : x*y <= 0 and x*y >= Z_MIN_INT <-> y >= Z_MIN_INT / x + x < 0 y < 0 : x*y >= 0 and x*y <= Z_MAX_INT <-> x >= Z_MAX_INT / y */ + if (x >= 0) + if (y >= 0) + return Val_bool(y > Z_MAX_INT / x); + else + return Val_bool(y < Z_MIN_INT / x); + else + if (y >= 0) + return Val_bool(x < Z_MIN_INT / y); + else + return Val_bool(x < Z_MAX_INT / y); +#endif +#endif +} + CAMLprim value ml_z_mul(value arg1, value arg2) { Z_DECL(arg1); Z_DECL(arg2); Z_MARK_OP; Z_CHECK(arg1); Z_CHECK(arg2); -#if Z_FAST_PATH && !defined(Z_ASM_mul) - if (Is_long(arg1) && Is_long(arg2)) { - /* fast path */ - intnat a1 = Long_val(arg1); - intnat a2 = Long_val(arg2); - if (!a1 || !a2) return Val_long(0); - /* small argument case */ - if (Z_FITS_HINT(arg1) && Z_FITS_HINT(arg2)) return Val_long(a1 * a2); +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML + if (Is_long(arg1) && Is_long(arg2) && + ml_z_mul_overflows(arg1, arg2) == Val_false) { + return Val_long(Long_val(a1) * Long_val(a2)); } #endif /* mpn_ version */ @@ -1554,7 +1618,7 @@ CAMLprim value ml_z_div(value arg1, value arg2) { Z_MARK_OP; Z_CHECK(arg1); Z_CHECK(arg2); -#if Z_FAST_PATH && !defined(Z_ASM_div) +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg1) && Is_long(arg2)) { /* fast path */ intnat a1 = Long_val(arg1); @@ -1574,7 +1638,7 @@ CAMLprim value ml_z_rem(value arg1, value arg2) { Z_MARK_OP; Z_CHECK(arg1); Z_CHECK(arg2); -#if Z_FAST_PATH && !defined(Z_ASM_rem) +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg1) && Is_long(arg2)) { /* fast path */ intnat a1 = Long_val(arg1); @@ -1706,7 +1770,7 @@ CAMLprim value ml_z_succ(value arg) { Z_MARK_OP; Z_CHECK(arg); -#if Z_FAST_PATH && !defined(Z_ASM_succ) +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg)) { /* fast path */ if (arg < Val_long(Z_MAX_INT)) return arg + 2; @@ -1721,7 +1785,7 @@ CAMLprim value ml_z_pred(value arg) { Z_MARK_OP; Z_CHECK(arg); -#if Z_FAST_PATH && !defined(Z_ASM_pred) +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg)) { /* fast path */ if (arg > Val_long(Z_MIN_INT)) return arg - 2; @@ -1929,7 +1993,7 @@ CAMLprim value ml_z_logand(value arg1, value arg2) { Z_MARK_OP; Z_CHECK(arg1); Z_CHECK(arg2); -#if Z_FAST_PATH && !defined(Z_ASM_logand) +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg1) && Is_long(arg2)) { /* fast path */ return arg1 & arg2; @@ -2011,7 +2075,7 @@ CAMLprim value ml_z_logor(value arg1, value arg2) { Z_MARK_OP; Z_CHECK(arg1); Z_CHECK(arg2); -#if Z_FAST_PATH && !defined(Z_ASM_logor) +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg1) && Is_long(arg2)) { /* fast path */ return arg1 | arg2; @@ -2097,7 +2161,7 @@ CAMLprim value ml_z_logxor(value arg1, value arg2) { Z_MARK_OP; Z_CHECK(arg1); Z_CHECK(arg2); -#if Z_FAST_PATH && !defined(Z_ASM_logxor) +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg1) && Is_long(arg2)) { /* fast path */ return (arg1 ^ arg2) | 1; @@ -2183,7 +2247,7 @@ CAMLprim value ml_z_lognot(value arg) { Z_MARK_OP; Z_CHECK(arg); -#if Z_FAST_PATH && !defined(Z_ASM_lognot) +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg)) { /* fast path */ return (~arg) | 1; @@ -2232,7 +2296,7 @@ CAMLprim value ml_z_shift_left(value arg, value count) if (!c) return arg; c1 = c / Z_LIMB_BITS; c2 = c % Z_LIMB_BITS; -#if Z_FAST_PATH && !defined(Z_ASM_shift_left) +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg) && !c1) { /* fast path */ value a = arg - 1; @@ -2281,7 +2345,7 @@ CAMLprim value ml_z_shift_right(value arg, value count) if (!c) return arg; c1 = c / Z_LIMB_BITS; c2 = c % Z_LIMB_BITS; -#if Z_FAST_PATH && !defined(Z_ASM_shift_right) +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg)) { /* fast path */ if (c1) { @@ -2342,7 +2406,7 @@ CAMLprim value ml_z_shift_right_trunc(value arg, value count) if (!c) return arg; c1 = c / Z_LIMB_BITS; c2 = c % Z_LIMB_BITS; -#if Z_FAST_PATH +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg)) { /* fast path */ if (c1) return Val_long(0); @@ -2661,7 +2725,7 @@ CAMLprim value ml_z_divexact(value arg1, value arg2) Z_DECL(arg1); Z_DECL(arg2); Z_MARK_OP; Z_CHECK(arg1); Z_CHECK(arg2); -#if Z_FAST_PATH +#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML if (Is_long(arg1) && Is_long(arg2)) { /* fast path */ intnat a1 = Long_val(arg1); diff --git a/caml_z_arm.S b/caml_z_arm.S deleted file mode 100644 index 9c88451..0000000 --- a/caml_z_arm.S +++ /dev/null @@ -1,265 +0,0 @@ -/* - Assembly version for the fast path of some functions in Z: - - ARM v5M and above target - - System 5 ABI and assembly syntax - - GNU as - - - This file is part of the Zarith library - http://forge.ocamlcore.org/projects/zarith . - It is distributed under LGPL 2 licensing, with static linking exception. - See the LICENSE file included in the distribution. - - Copyright (c) 2013 Xavier Leroy, INRIA Paris-Rocquencourt, - and Antoine Miné, Abstraction project. - Abstraction is part of the LIENS (Laboratoire d'Informatique de l'ENS), - a joint laboratory by: - CNRS (Centre national de la recherche scientifique, France), - ENS (École normale supérieure, Paris, France), - INRIA Rocquencourt (Institut national de recherche en informatique, France). - - */ - - - /* makes the stack non-executable. */ - .section .note.GNU-stack,"",%progbits - - - /* helper functions */ - /* **************** */ - - - /* dot prefix for local labels */ - -#define L(x) .L##x - - /* function prolog & epilog */ - -#define PROLOG(proc) \ - .text; \ - .global ml_as_z_##proc; \ - .type ml_as_z_##proc, %function; \ -ml_as_z_##proc: - -#define EPILOG(proc) \ - .size ml_as_z_##proc, . - ml_as_z_##proc - - /* calling C functions */ - -#define C_JMP(proc) \ - b ml_z_##proc(PLT) - - /* operation counter (not implemented) */ - -#define OP - - /* unary arithmetics */ - /* ***************** */ - - /* neg */ - PROLOG(neg) -L(negenter): - tst r0, #1 - beq L(neg) - rsbs r1, r0, #2 /* r1 = 2 - r0 */ - bvs L(neg) - mov r0, r1 - OP - bx lr -L(neg): - C_JMP(neg) - EPILOG(neg) - - - /* abs */ - PROLOG(abs) - tst r0, #1 - beq L(abs) - cmp r0, #0 - bge L(abs2) - rsbs r1, r0, #2 /* r1 = 2 - r0 */ - bvs L(abs) - mov r0, r1 -L(abs2): - OP - bx lr -L(abs): - C_JMP(abs) - EPILOG(abs) - - - /* succ */ - PROLOG(succ) - tst r0, #1 - beq L(succ) - adds r1, r0, #2 - bvs L(succ) - mov r0, r1 - OP - bx lr -L(succ): - C_JMP(succ) - EPILOG(succ) - - - /* pred */ - PROLOG(pred) - tst r0, #1 - beq L(pred) - subs r1, r0, #2 - bvs L(pred) - mov r0, r1 - OP - bx lr -L(pred): - C_JMP(pred) - EPILOG(pred) - - - - - /* binary arithmetics */ - /* ****************** */ - - - /* add */ - PROLOG(add) - and r2, r0, r1 - tst r2, #1 - beq L(add) - sub r2, r0, #1 - adds r2, r2, r1 - bvs L(add) - mov r0, r2 - OP - bx lr -L(add): - C_JMP(add) - EPILOG(add) - - - /* sub */ - PROLOG(sub) - and r2, r0, r1 - tst r2, #1 - beq L(sub) - subs r2, r0, r1 - bvs L(sub) - add r0, r2, #1 - OP - bx lr -L(sub): - C_JMP(sub) - EPILOG(sub) - - - /* mul */ - PROLOG(mul) - and r2, r0, r1 - tst r2, #1 - beq L(mul) - sub r2, r0, #1 - mov r3, r1, asr #1 - smull r3, r12, r2, r3 /* r3 = low half of product, r12 = high half */ - cmp r12, r3, asr #31 /* high half must equal sign-ext of low half */ - bne L(mul) /* otherwise, overflow occurred */ - add r0, r3, #1 - OP - bx lr -L(mul): - C_JMP(mul) - EPILOG(mul) - - /* bit operations */ - /* ************** */ - - - /* not */ - PROLOG(lognot) - tst r0, #1 - beq L(lognot) - sub r0, r0, #1 - mvn r0, r0 - OP - bx lr -L(lognot): - C_JMP(lognot) - EPILOG(lognot) - - - /* and */ - PROLOG(logand) - and r2, r0, r1 - tst r2, #1 - beq L(logand) - mov r0, r2 - OP - bx lr -L(logand): - C_JMP(logand) - EPILOG(logand) - - - /* or */ - PROLOG(logor) - and r2, r0, r1 - tst r2, #1 - beq L(logor) - orr r0, r0, r1 - OP - bx lr -L(logor): - C_JMP(logor) - EPILOG(logor) - - - /* xor */ - PROLOG(logxor) - and r2, r0, r1 - tst r2, #1 - beq L(logxor) - eor r0, r0, r1 - orr r0, r0, #1 - OP - bx lr -L(logxor): - C_JMP(logxor) - EPILOG(logxor) - - - /* shift_left */ - PROLOG(shift_left) - tst r0, #1 - beq L(shift_left) - cmp r1, #63 /* 31 in 2n+1 encoding */ - bhs L(shift_left) - mov r3, r1, asr #1 - sub r2, r0, #1 - mov r12, r2, lsl r3 - cmp r2, r12, asr r3 - bne L(shift_left) /* overflow occurred */ - orr r0, r12, #1 - OP - bx lr -L(shift_left): - C_JMP(shift_left) - EPILOG(shift_left) - - - /* shift_right */ - PROLOG(shift_right) - tst r0, #1 - beq L(shift_right) - movs r2, r1, asr #1 - bmi L(shift_right) /* if shift amount < 0, go to slow path */ - cmp r2, #31 - movlo r0, r0, asr r2 /* if shift amount < 31, shift by this amount */ - movhs r0, r0, asr #31 /* if shift amount >= 31, shift by 31 */ - orr r0, r0, #1 - OP - bx lr -L(shift_right): - C_JMP(shift_right) - EPILOG(shift_right) - - diff --git a/caml_z_i686.S b/caml_z_i686.S deleted file mode 100644 index 83321db..0000000 --- a/caml_z_i686.S +++ /dev/null @@ -1,406 +0,0 @@ -/* - Assembly version for the fast path of some functions in Z: - - x86 target - - System 5 ABI and assembly syntax - - GNU as - - - This file is part of the Zarith library - http://forge.ocamlcore.org/projects/zarith . - It is distributed under LGPL 2 licensing, with static linking exception. - See the LICENSE file included in the distribution. - - Copyright (c) 2010-2011 Antoine Miné, Abstraction project. - Abstraction is part of the LIENS (Laboratoire d'Informatique de l'ENS), - a joint laboratory by: - CNRS (Centre national de la recherche scientifique, France), - ENS (École normale supérieure, Paris, France), - INRIA Rocquencourt (Institut national de recherche en informatique, France). - - */ - - - /* makes the stack non-executable. */ -#ifdef Z_ELF - .section .note.GNU-stack,"",@progbits -#endif - - /* helper functions */ - /* **************** */ - - - /* optional underscope prefix for symbols */ -#ifdef Z_UNDERSCORE_PREFIX -#define SYMB(x) _##x -#else -#define SYMB(x) x -#endif - - - /* optional dot prefix for local labels */ -#ifdef Z_DOT_LABEL_PREFIX -#define L(x) .L##x -#else -#define L(x) L##x -#endif - - /* function prolog & epilog */ - -#if defined(Z_ELF) || defined(Z_COFF) -#define FUNCTION_ALIGN 16 -#endif -#if defined(Z_MACOS) -#define FUNCTION_ALIGN 4 -#endif - -#if defined(Z_ELF) -#define PROLOG(proc) \ - .text; \ - .globl SYMB(ml_as_z_##proc); \ - .type SYMB(ml_as_z_##proc), @function; \ - .align FUNCTION_ALIGN; \ -SYMB(ml_as_z_##proc): - -#define EPILOG(proc) \ - .size SYMB(ml_as_z_##proc), .-SYMB(ml_as_z_##proc) -#endif - -#if defined(Z_COFF) || defined(Z_MACOS) -#define PROLOG(proc) \ - .text; \ - .globl SYMB(ml_as_z_##proc); \ - .align FUNCTION_ALIGN; \ -SYMB(ml_as_z_##proc): - -#define EPILOG(proc) -#endif - - /* calling C functions */ - -#define C_JMP(proc) \ - jmp SYMB(ml_z_##proc) - - - /* operation counter */ - -#ifdef Z_PERF_COUNTER -#define OP \ - incl SYMB(ml_z_ops_as) -#else -#define OP -#endif - - - - /* unary arithmetics */ - /* ***************** */ - - /* neg */ - PROLOG(neg) -L(negenter): - mov 4(%esp), %eax - test $1, %al - jz L(neg) - not %eax - add $3, %eax - jo L(neg) - OP - ret -L(neg): - C_JMP(neg) - EPILOG(neg) - - - /* abs */ - PROLOG(abs) - mov 4(%esp), %eax - test $1, %al - jz L(abs) - test %eax, %eax - jns L(abs2) - not %eax - add $3, %eax - jo L(abs) -L(abs2): - OP - ret -L(abs): - C_JMP(abs) - EPILOG(abs) - - - /* succ */ - PROLOG(succ) - mov 4(%esp), %eax - test $1, %al - jz L(succ) - add $2, %eax - jo L(succ) - OP - ret -L(succ): - C_JMP(succ) - EPILOG(succ) - - - /* pred */ - PROLOG(pred) - mov 4(%esp), %eax - test $1, %al - jz L(pred) - sub $2, %eax - jo L(pred) - OP - ret -L(pred): - C_JMP(pred) - EPILOG(pred) - - - - - /* binary arithmetics */ - /* ****************** */ - - /* add */ - PROLOG(add) - mov 4(%esp), %eax - test $1, %al - jz L(add) - mov 8(%esp), %ecx - test $1, %cl - jz L(add) - dec %eax - add %ecx, %eax - jo L(add) - OP - ret -L(add): - C_JMP(add) - EPILOG(add) - - - /* sub */ - PROLOG(sub) - mov 4(%esp), %eax - test $1, %al - jz L(sub) - mov 8(%esp), %ecx - test $1, %cl - jz L(sub) - sub %ecx, %eax - jo L(sub) - inc %eax - OP - ret -L(sub): - C_JMP(sub) - EPILOG(sub) - - - /* mul */ - PROLOG(mul) - mov 4(%esp), %eax - test $1, %al - jz L(mul) - mov 8(%esp), %ecx - sar %ecx /* sets CF to least significant bit */ - jnc L(mul) - dec %eax - imul %ecx, %eax - jo L(mul) - inc %eax - OP - ret -L(mul): - C_JMP(mul) - EPILOG(mul) - - - /* div */ - PROLOG(div) - mov 8(%esp), %ecx - sar %ecx - jnc L(div) /* not a 31-bit integer */ - jz L(div) /* division by zero */ - cmp $-1, %ecx - /* division by -1, the only one that can overflow */ - je L(negenter) - mov 4(%esp), %eax - sar %eax - jnc L(div) /* not a 31-bit integer */ - cdq - idiv %ecx - lea 1(%eax, %eax), %eax - OP - ret -L(div): - C_JMP(div) - EPILOG(div) - - - /* divexacty */ - PROLOG(divexact) - mov 8(%esp), %ecx - sar %ecx - jnc L(divexact) /* not a 31-bit integer */ - jz L(divexact) /* division by zero */ - cmp $-1, %ecx - /* division by -1, the only one that can overflow */ - je L(negenter) - mov 4(%esp), %eax - sar %eax - jnc L(divexact) /* not a 31-bit integer */ - cdq - idiv %ecx - lea 1(%eax, %eax), %eax - OP - ret -L(divexact): - C_JMP(divexact) - EPILOG(divexact) - - - /* rem */ - PROLOG(rem) - mov 4(%esp), %eax - sar %eax - jnc L(rem) /* not a 31-bit integer */ - mov 8(%esp), %ecx - sar %ecx - jnc L(rem) /* not a 31-bit integer */ - jz L(rem) /* division by zero */ - cmp $-1, %ecx - je L(remneg) - cdq - idiv %ecx - lea 1(%edx, %edx), %eax - OP - ret -L(remneg): - /* division by -1 */ - mov $1, %eax - OP - ret -L(rem): - C_JMP(rem) - EPILOG(rem) - - - /* bit operations */ - /* ************** */ - - - /* not */ - PROLOG(lognot) - mov 4(%esp), %eax - test $1, %al - jz L(lognot) - dec %eax - not %eax - ret -L(lognot): - C_JMP(lognot) - EPILOG(lognot) - - - /* or */ - PROLOG(logor) - mov 4(%esp), %eax - test $1, %al - jz L(logor) - mov 8(%esp), %ecx - test $1, %cl - jz L(logor) - or %ecx, %eax - OP - ret -L(logor): - C_JMP(logor) - EPILOG(logor) - - /* and */ - PROLOG(logand) - mov 4(%esp), %eax - test $1, %al - jz L(logand) - mov 8(%esp), %ecx - test $1, %cl - jz L(logand) - and %ecx, %eax - OP - ret -L(logand): - C_JMP(logand) - EPILOG(logand) - - - /* xor */ - PROLOG(logxor) - mov 4(%esp), %eax - test $1, %al - jz L(logxor) - mov 8(%esp), %ecx - test $1, %cl - jz L(logxor) - xor %ecx, %eax - inc %eax - OP - ret -L(logxor): - C_JMP(logxor) - EPILOG(logxor) - - - /* shift_left */ - PROLOG(shift_left) - mov 4(%esp), %eax - test $1, %al - jz L(shift_left) - mov 8(%esp), %ecx - sar %ecx - cmp $31, %ecx - jae L(shift_left) - dec %eax - sal %cl, %eax - mov %eax, %edx - sar %cl, %edx - inc %edx - cmp 4(%esp), %edx - jne L(shift_left) /* overflow */ - inc %eax - OP - ret -L(shift_left): - C_JMP(shift_left) - EPILOG(shift_left) - - - /* shift_right */ - PROLOG(shift_right) - mov 4(%esp), %eax - test $1, %al - jz L(shift_right) - mov 8(%esp), %ecx - sar %ecx - js L(shift_right) - cmp $31, %ecx - jae L(shift_right2) - sar %cl, %eax - or $1, %eax - OP - ret -L(shift_right2): - /* shift by 31 or more */ - test %eax, %eax - js L(shift_right3) - mov $1, %eax - OP - ret -L(shift_right3): - mov $-1, %eax - OP - ret -L(shift_right): - C_JMP(shift_right) - EPILOG(shift_right) diff --git a/caml_z_x86_64.S b/caml_z_x86_64.S deleted file mode 100644 index 13ebbee..0000000 --- a/caml_z_x86_64.S +++ /dev/null @@ -1,396 +0,0 @@ -/* - Assembly version for the fast path of some functions in Z: - - x86_64 target - - System 5 ABI and assembly syntax - - GNU as - - - This file is part of the Zarith library - http://forge.ocamlcore.org/projects/zarith . - It is distributed under LGPL 2 licensing, with static linking exception. - See the LICENSE file included in the distribution. - - Copyright (c) 2010-2011 Antoine Miné, Abstraction project. - Abstraction is part of the LIENS (Laboratoire d'Informatique de l'ENS), - a joint laboratory by: - CNRS (Centre national de la recherche scientifique, France), - ENS (École normale supérieure, Paris, France), - INRIA Rocquencourt (Institut national de recherche en informatique, France). - - */ - - - /* makes the stack non-executable. */ -#ifdef Z_ELF - .section .note.GNU-stack,"",@progbits -#endif - - /* helper functions */ - /* **************** */ - - - /* optional underscope prefix for symbols */ -#ifdef Z_UNDERSCORE_PREFIX -#define SYMB(x) _##x -#else -#define SYMB(x) x -#endif - - /* optional dot prefix for local labels */ -#ifdef Z_DOT_LABEL_PREFIX -#define L(x) .L##x -#else -#define L(x) L##x -#endif - - /* function prolog & epilog */ - -#if defined(Z_ELF) -#define FUNCTION_ALIGN 16 -#endif -#if defined(Z_MACOS) -#define FUNCTION_ALIGN 4 -#endif - -#if defined(Z_ELF) -#define PROLOG(proc) \ - .text; \ - .globl SYMB(ml_as_z_##proc); \ - .type SYMB(ml_as_z_##proc), @function; \ - .align FUNCTION_ALIGN; \ -SYMB(ml_as_z_##proc): - -#define EPILOG(proc) \ - .size SYMB(ml_as_z_##proc), .-SYMB(ml_as_z_##proc) -#endif - -#if defined(Z_MACOS) -#define PROLOG(proc) \ - .text; \ - .globl SYMB(ml_as_z_##proc); \ - .align FUNCTION_ALIGN; \ -SYMB(ml_as_z_##proc): - -#define EPILOG(proc) - -#endif - - /* calling C functions */ - -#if defined(Z_ELF) -#define C_JMP(proc) \ - jmp SYMB(ml_z_##proc@PLT) -#endif - -#if defined(Z_MACOS) -#define C_JMP(proc) \ - jmp SYMB(ml_z_##proc) -#endif - - /* operation counter */ - -#ifndef Z_PERF_COUNTER -#define OP -#else -#if defined(Z_ELF) || defined(Z_MACOS) -#define OP \ - mov SYMB(ml_z_ops_as@GOTPCREL(%rip)), %rcx; \ - addq $1, (%rcx) -#endif -#endif - - /* unary arithmetics */ - /* ***************** */ - - /* neg */ - PROLOG(neg) -L(negenter): - test $1, %dil - jz L(neg) - mov %rdi, %rax - not %rax - add $3, %rax - jo L(neg) - OP - ret -L(neg): - C_JMP(neg) - EPILOG(neg) - - - /* abs */ - PROLOG(abs) - test $1, %dil - jz L(abs) - mov %rdi, %rax - test %rdi, %rdi - jns L(abs2) - not %rax - add $3, %rax - jo L(neg) -L(abs2): - OP - ret -L(abs): - C_JMP(abs) - EPILOG(abs) - - - /* succ */ - PROLOG(succ) - test $1, %dil - jz L(succ) - mov %rdi, %rax - add $2, %rax - jo L(succ) - OP - ret -L(succ): - C_JMP(succ) - EPILOG(succ) - - - /* pred */ - PROLOG(pred) - test $1, %dil - jz L(pred) - mov %rdi, %rax - sub $2, %rax - jo L(pred) - OP - ret -L(pred): - C_JMP(pred) - EPILOG(pred) - - - - - /* binary arithmetics */ - /* ****************** */ - - - /* add */ - PROLOG(add) - test $1, %dil - jz L(add) - test $1, %sil - jz L(add) - lea -1(%rdi), %rax - add %rsi, %rax - jo L(add) - OP - ret -L(add): - C_JMP(add) - EPILOG(add) - - - /* sub */ - PROLOG(sub) - test $1, %dil - jz L(sub) - test $1, %sil - jz L(sub) - mov %rdi, %rax - sub %rsi, %rax - jo L(sub) - inc %rax - OP - ret -L(sub): - C_JMP(sub) - EPILOG(sub) - - - /* mul */ - PROLOG(mul) - test $1, %dil - jz L(mul) - mov %rsi, %rcx - sar %rcx - jnc L(mul) - lea -1(%rdi), %rax - imul %rcx, %rax - jo L(mul) - inc %rax - OP - ret -L(mul): - C_JMP(mul) - EPILOG(mul) - - - /* div */ - PROLOG(div) - mov %rsi, %rcx - cmp $-1, %rsi - /* division by -1, the only one that can overflow */ - je L(negenter) - sar %rcx - jnc L(div) - jz L(div) /* division by zero */ - mov %rdi, %rax - sar %rax - jnc L(div) - cqo - idiv %rcx - lea 1(%rax, %rax), %rax - OP - ret -L(div): - C_JMP(div) - EPILOG(div) - - - /* divexact */ - PROLOG(divexact) - mov %rsi, %rcx - cmp $-1, %rsi - /* division by -1, the only one that can overflow */ - je L(negenter) - sar %rcx - jnc L(divexact) - jz L(divexact) /* division by zero */ - mov %rdi, %rax - sar %rax - jnc L(divexact) - cqo - idiv %rcx - lea 1(%rax, %rax), %rax - OP - ret -L(divexact): - C_JMP(divexact) - EPILOG(divexact) - - - /* rem */ - PROLOG(rem) - mov %rdi, %rax - sar %rax - jnc L(rem) - mov %rsi, %rcx - sar %rcx - jnc L(rem) - jz L(rem) /* division by zero */ - cmp $-1, %rcx - je L(remneg) - cqo - idiv %rcx - lea 1(%rdx, %rdx), %rax - OP - ret -L(remneg): - /* division by -1 */ - mov $1, %eax - OP - ret -L(rem): - C_JMP(rem) - EPILOG(rem) - - - /* bit operations */ - /* ************** */ - - - /* not */ - PROLOG(lognot) - test $1, %dil - jz L(lognot) - lea -1(%rdi), %rax - not %rax - OP - ret -L(lognot): - C_JMP(lognot) - EPILOG(lognot) - - - /* and */ - PROLOG(logand) - mov %rdi, %rax - and %rsi, %rax - test $1, %al - jz L(logand) - OP - ret -L(logand): - C_JMP(logand) - EPILOG(logand) - - - /* or */ - PROLOG(logor) - test $1, %dil - jz L(logor) - test $1, %sil - jz L(logor) - mov %rdi, %rax - or %rsi, %rax - OP - ret -L(logor): - C_JMP(logor) - EPILOG(logor) - - - /* xor */ - PROLOG(logxor) - test $1, %dil - jz L(logxor) - test $1, %sil - jz L(logxor) - lea -1(%rdi), %rax - xor %rsi, %rax - OP - ret -L(logxor): - C_JMP(logxor) - EPILOG(logxor) - - - /* shift_left */ - PROLOG(shift_left) - test $1, %dil - jz L(shift_left) - mov %esi, %ecx - sar %ecx - cmp $127, %rsi /* 63 unboxed */ - jae L(shift_left) - lea -1(%rdi), %rax - mov %rax, %r8 - sal %cl, %rax - mov %rax, %rdx - sar %cl, %rdx - cmp %r8, %rdx - jne L(shift_left) /* overflow */ - inc %rax - OP - ret -L(shift_left): - C_JMP(shift_left) - EPILOG(shift_left) - - - /* shift_right */ - PROLOG(shift_right) - test $1, %dil - jz L(shift_right) - mov %rsi, %rcx - mov $63, %eax - sar %rcx - js L(shift_right) - cmp %rax, %rcx /* compare second argument to 63 */ - cmovae %eax, %ecx /* if above or equal, then use 63 */ - mov %rdi, %rax - sar %cl, %rax - or $1, %rax - OP - ret -L(shift_right): - C_JMP(shift_right) - EPILOG(shift_right) - - diff --git a/caml_z_x86_64_mingw64.S b/caml_z_x86_64_mingw64.S deleted file mode 100644 index a25ccf6..0000000 --- a/caml_z_x86_64_mingw64.S +++ /dev/null @@ -1,381 +0,0 @@ -/* - Assembly version for the fast path of some functions in Z: - - x86_64 target - - Win64 ABI - - GNU as - - - This file is part of the Zarith library - http://forge.ocamlcore.org/projects/zarith . - It is distributed under LGPL 2 licensing, with static linking exception. - See the LICENSE file included in the distribution. - - Copyright (c) 2010-2011 Antoine Miné, Abstraction project. - Abstraction is part of the LIENS (Laboratoire d'Informatique de l'ENS), - a joint laboratory by: - CNRS (Centre national de la recherche scientifique, France), - ENS (École normale supérieure, Paris, France), - INRIA Rocquencourt (Institut national de recherche en informatique, France). - - */ - - - /* helper functions */ - /* **************** */ - - -#define SYMB(x) x -#define FUNCTION_ALIGN 16 - -#define PROLOG(proc) \ - .text; \ - .globl SYMB(ml_as_z_##proc); \ - .align FUNCTION_ALIGN; \ -SYMB(ml_as_z_##proc):\ - -#define EPILOG(proc) - -#define C_JMP(proc) \ - jmp SYMB(ml_z_##proc) - - - /* operation counter */ - -#ifndef Z_PERF_COUNTER -#define OP -#else -#define OP \ - mov SYMB(ml_z_ops_as(%rip)), %rcx; \ - addq $1, (%rcx) -#endif - - /* unary arithmetics */ - /* ***************** */ - - /* neg */ - PROLOG(neg) - test $1, %rcx - jz .Lneg - mov %rcx, %rax - not %rax - add $3, %rax - jo .Lneg - OP - ret -.Lneg: - C_JMP(neg) - EPILOG(neg) - - - /* abs */ - PROLOG(abs) - test $1, %rcx - jz .Labs - mov %rcx, %rax - test %rcx, %rcx - jns .Labs2 - not %rax - add $3, %rax - jo .Lneg -.Labs2: - OP - ret -.Labs: - C_JMP(abs) - EPILOG(abs) - - - /* succ */ - PROLOG(succ) - test $1, %rcx - jz .Lsucc - mov %rcx, %rax - add $2, %rax - jo .Lsucc - OP - ret -.Lsucc: - C_JMP(succ) - EPILOG(succ) - - - /* pred */ - PROLOG(pred) - test $1, %rcx - jz .Lpred - mov %rcx, %rax - sub $2, %rax - jo .Lpred - OP - ret -.Lpred: - C_JMP(pred) - EPILOG(pred) - - - - - /* binary arithmetics */ - /* ****************** */ - - - /* add */ - PROLOG(add) - test $1, %rcx - jz .Ladd - test $1, %rdx - jz .Ladd - lea -1(%rcx), %rax - add %rdx, %rax - jo .Ladd - OP - ret -.Ladd: - C_JMP(add) - EPILOG(add) - - - /* sub */ - PROLOG(sub) - test $1, %rcx - jz .Lsub - test $1, %rdx - jz .Lsub - mov %rcx, %rax - sub %rdx, %rax - jo .Lsub - inc %rax - OP - ret -.Lsub: - C_JMP(sub) - EPILOG(sub) - - - /* mul */ - PROLOG(mul) - test $1, %rcx - jz .Lmul - test $1, %rdx - jz .Lmul - lea -1(%rdx), %rax - mov %rcx, %r8 - sar %r8 - imul %r8, %rax - jo .Lmul - inc %rax - OP - ret -.Lmul: - C_JMP(mul) - EPILOG(mul) - - - /* div */ - PROLOG(div) - test $1, %rcx - jz .Ldiv - test $1, %rdx - jz .Ldiv - mov %rdx, %r8 - mov %rcx, %rax - sar %r8 - jz .Ldiv /* division by zero */ - cmp $-1, %r8 - je .Ldivneg - sar %rax - cqo - idiv %r8 - sal %rax - inc %rax - OP - ret -.Ldivneg: - /* division by -1, the only one that can overflow */ - not %rax - add $3, %rax - jo .Ldiv - OP - ret -.Ldiv: - C_JMP(div) - EPILOG(div) - - - /* divexact */ - PROLOG(divexact) - test $1, %rcx - jz .Ldivexact - test $1, %rdx - jz .Ldivexact - mov %rdx, %r8 - mov %rcx, %rax - sar %r8 - jz .Ldivexact /* division by zero */ - cmp $-1, %r8 - je .Ldivexactneg - sar %rax - cqo - idiv %r8 - sal %rax - inc %rax - OP - ret -.Ldivexactneg: - /* division by -1, the only one that can overflow */ - not %rax - add $3, %rax - jo .Ldivexact - OP - ret -.Ldivexact: - C_JMP(divexact) - EPILOG(divexact) - - - /* rem */ - PROLOG(rem) - test $1, %rcx - jz .Lrem - test $1, %rdx - jz .Lrem - mov %rdx, %r8 - mov %rcx, %rax - sar %r8 - jz .Lrem /* division by zero */ - cmp $-1, %r8 - je .Lremneg - sar %rax - cqo - idiv %r8 - sal %rdx - lea 1(%rdx), %rax - OP - ret -.Lremneg: - /* division by -1 */ - mov $1, %rax - OP - ret -.Lrem: - C_JMP(rem) - EPILOG(rem) - - - /* bit operations */ - /* ************** */ - - - /* not */ - PROLOG(lognot) - test $1, %rcx - jz .Llognot - lea -1(%rcx), %rax - not %rax - OP - ret -.Llognot: - C_JMP(lognot) - EPILOG(lognot) - - - /* and */ - PROLOG(logand) - mov %rcx, %rax - and %rdx, %rax - test $1, %rax - jz .Llogand - OP - ret -.Llogand: - C_JMP(logand) - EPILOG(logand) - - - /* or */ - PROLOG(logor) - test $1, %rcx - jz .Llogor - test $1, %rdx - jz .Llogor - mov %rcx, %rax - or %rdx, %rax - OP - ret -.Llogor: - C_JMP(logor) - EPILOG(logor) - - - /* xor */ - PROLOG(logxor) - test $1, %rcx - jz .Llogxor - test $1, %rdx - jz .Llogxor - lea -1(%rcx), %rax - xor %rdx, %rax - OP - ret -.Llogxor: - C_JMP(logxor) - EPILOG(logxor) - - - /* shift_left */ - PROLOG(shift_left) - test $1, %rcx - jz .Lshift_left2 - lea -1(%rcx), %rax - mov %rcx, %r9 - mov %rdx, %r10 - sar %rdx - cmp $63, %rdx - jae .Lshift_left - mov %rdx, %rcx - mov %rax, %r8 - sal %cl, %rax - mov %rax, %rdx - sar %cl, %rdx - cmp %r8, %rdx - jne .Lshift_left /* overflow */ - inc %rax - OP - ret -.Lshift_left: - mov %r9, %rcx - mov %r10, %rdx -.Lshift_left2: - C_JMP(shift_left) - EPILOG(shift_left) - - - /* shift_right */ - PROLOG(shift_right) - test $1, %rcx - jz .Lshift_right - mov %rcx, %rax - mov %rdx, %rcx - sar %rcx - js .Lshift_right - cmp $63, %rcx - jae .Lshift_right2 - sar %cl, %rax - or $1, %rax - OP - ret -.Lshift_right2: - /* shift by 63 or more */ - test %rax, %rax - js .Lshift_right3 - mov $1, %rax - OP - ret -.Lshift_right3: - mov $-1, %rax - OP - ret -.Lshift_right: - C_JMP(shift_right) - EPILOG(shift_right) - diff --git a/z.mlip b/z.mlip index fa1665d..69b5102 100644 --- a/z.mlip +++ b/z.mlip @@ -117,34 +117,34 @@ external of_substring_base (** {1 Basic arithmetic operations} *) -external succ: t -> t = succ@ASM +val succ: t -> t (** Returns its argument plus one. *) -external pred: t -> t = pred@ASM +val pred: t -> t (** Returns its argument minus one. *) -external abs: t -> t = abs@ASM +val abs: t -> t (** Absolute value. *) -external neg: t -> t = neg@ASM +val neg: t -> t (** Unary negation. *) -external add: t -> t -> t = add@ASM +val add: t -> t -> t (** Addition. *) -external sub: t -> t -> t = sub@ASM +val sub: t -> t -> t (** Subtraction. *) -external mul: t -> t -> t = mul@ASM +val mul: t -> t -> t (** Multiplication. *) -external div: t -> t -> t = div@ASM +val div: t -> t -> t (** Integer division. The result is truncated towards zero and obeys the rule of signs. Raises [Division_by_zero] if the divisor (second argument) is 0. *) -external rem: t -> t -> t = rem@ASM +val rem: t -> t -> t (** Integer remainder. Can raise a [Division_by_zero]. The result of [rem a b] has the sign of [a], and its absolute value is strictly smaller than the absolute value of [b]. @@ -185,7 +185,7 @@ val erem: t -> t -> t [a = b * ediv a b + erem a b]. Raises [Division_by_zero] if [b = 0]. *) -external divexact: t -> t -> t = divexact@ASM +val divexact: t -> t -> t (** [divexact a b] divides [a] by [b], only producing correct result when the division is exact, i.e., when [b] evenly divides [a]. It should be faster than general division. @@ -214,34 +214,34 @@ external congruent: t -> t -> t -> bool = "ml_z_congruent" 1s. *) -external logand: t -> t -> t = logand@ASM +val logand: t -> t -> t (** Bitwise logical and. *) - -external logor: t -> t -> t = logor@ASM + +val logor: t -> t -> t (** Bitwise logical or. *) - -external logxor: t -> t -> t = logxor@ASM + +val logxor: t -> t -> t (** Bitwise logical exclusive or. *) -external lognot: t -> t = lognot@ASM -(** Bitwise logical negation. +val lognot: t -> t +(** Bitwise logical negation. The identity [lognot a]=[-a-1] always hold. *) -external shift_left: t -> int -> t = shift_left@ASM -(** Shifts to the left. +val shift_left: t -> int -> t +(** Shifts to the left. Equivalent to a multiplication by a power of 2. The second argument must be nonnegative. *) -external shift_right: t -> int -> t = shift_right@ASM -(** Shifts to the right. - This is an arithmetic shift, +val shift_right: t -> int -> t +(** Shifts to the right. + This is an arithmetic shift, equivalent to a division by a power of 2 with rounding towards -oo. The second argument must be nonnegative. *) -external shift_right_trunc: t -> int -> t = shift_right_trunc@ASM +val shift_right_trunc: t -> int -> t (** Shifts to the right, rounding towards 0. This is equivalent to a division by a power of 2, with truncation. The second argument must be nonnegative. @@ -286,8 +286,8 @@ external hamdist: t -> t -> int = "ml_z_hamdist" converted value, an [Overflow] exception is raised. *) -external to_int: t -> int = "ml_z_to_int" -(** Converts to a base integer. May raise [Overflow]. *) +val to_int: t -> int +(** Converts to a base integer. May raise an [Overflow]. *) external to_int32: t -> int32 = "ml_z_to_int32" (** Converts to a 32-bit integer. May raise [Overflow]. *) @@ -632,22 +632,22 @@ external of_bits: string -> t = "ml_z_of_bits" [Z.(~$2 + ~$5 * ~$10)]. *) -external (~-): t -> t = neg@ASM +val (~-): t -> t (** Negation [neg]. *) val (~+): t -> t (** Identity. *) -external (+): t -> t -> t = add@ASM +val (+): t -> t -> t (** Addition [add]. *) -external (-): t -> t -> t = sub@ASM +val (-): t -> t -> t (** Subtraction [sub]. *) -external ( * ): t -> t -> t = mul@ASM +val ( * ): t -> t -> t (** Multiplication [mul]. *) -external (/): t -> t -> t = div@ASM +val (/): t -> t -> t (** Truncated division [div]. *) external (/>): t -> t -> t = "ml_z_cdiv" @@ -656,31 +656,32 @@ external (/>): t -> t -> t = "ml_z_cdiv" external (/<): t -> t -> t = "ml_z_fdiv" (** Flooring division [fdiv]. *) -external (/|): t -> t -> t = divexact@ASM -(** Exact division [div_exact]. *) +val (/|): t -> t -> t +(** Exact division [divexact]. *) -external (mod): t -> t -> t = rem@ASM +val (mod): t -> t -> t (** Remainder [rem]. *) -external (land): t -> t -> t = logand@ASM +val (land): t -> t -> t (** Bit-wise logical and [logand]. *) -external (lor): t -> t -> t = logor@ASM +val (lor): t -> t -> t (** Bit-wise logical inclusive or [logor]. *) -external (lxor): t -> t -> t = logxor@ASM +val (lxor): t -> t -> t (** Bit-wise logical exclusive or [logxor]. *) -external (~!): t -> t = lognot@ASM +val (~!): t -> t (** Bit-wise logical negation [lognot]. *) -external (lsl): t -> int -> t = shift_left@ASM +val (lsl): t -> int -> t (** Bit-wise shift to the left [shift_left]. *) -external (asr): t -> int -> t = shift_right@ASM +val (asr): t -> int -> t (** Bit-wise shift to the right [shift_right]. *) external (~$): int -> t = "ml_z_of_int" @NOALLOC + (** Conversion from [int] [of_int]. *) external ( ** ): t -> int -> t = "ml_z_pow" diff --git a/z.mlp b/z.mlp index 1e979d4..8660f57 100644 --- a/z.mlp +++ b/z.mlp @@ -25,31 +25,193 @@ let _ = init () let _ = Callback.register_exception "ml_z_overflow" Overflow -external neg: t -> t = neg@ASM -external add: t -> t -> t = add@ASM -external sub: t -> t -> t = sub@ASM -external mul: t -> t -> t = mul@ASM -external div: t -> t -> t = div@ASM +external is_small_int: t -> bool = "%obj_is_int" +external unsafe_to_int: t -> int = "%identity" +external unsafe_of_int: int -> t = "%identity" + +external c_neg: t -> t = "ml_z_neg" + +let neg x = + if is_small_int x && unsafe_to_int x <> min_int + then unsafe_of_int (- unsafe_to_int x) + else c_neg x + +external c_add: t -> t -> t = "ml_z_add" + +let add x y = + if is_small_int x && is_small_int y then begin + let z = unsafe_to_int x + unsafe_to_int y in + (* Overflow check -- Hacker's Delight, section 2.12 *) + if (z lxor unsafe_to_int x) land (z lxor unsafe_to_int y) >= 0 + then unsafe_of_int z + else c_add x y + end else + c_add x y + +external c_sub: t -> t -> t = "ml_z_sub" + +let sub x y = + if is_small_int x && is_small_int y then begin + let z = unsafe_to_int x - unsafe_to_int y in + (* Overflow check -- Hacker's Delight, section 2.12 *) + if (unsafe_to_int x lxor unsafe_to_int y) + land (z lxor unsafe_to_int x) >= 0 + then unsafe_of_int z + else c_sub x y + end else + c_sub x y + +external mul_overflows: int -> int -> bool = "ml_z_mul_overflows" @NOALLOC +external c_mul: t -> t -> t = "ml_z_mul" + +let mul x y = + if is_small_int x && is_small_int y + && not (mul_overflows (unsafe_to_int x) (unsafe_to_int y)) + then unsafe_of_int (unsafe_to_int x * unsafe_to_int y) + else c_mul x y + +external c_div: t -> t -> t = "ml_z_div" + +let div x y = + if is_small_int y then + if unsafe_to_int y = -1 then + neg x + else if is_small_int x then + unsafe_of_int (unsafe_to_int x / unsafe_to_int y) + else + c_div x y + else + c_div x y + external cdiv: t -> t -> t = "ml_z_cdiv" external fdiv: t -> t -> t = "ml_z_fdiv" -external rem: t -> t -> t = rem@ASM + +external c_rem: t -> t -> t = "ml_z_rem" + +let rem x y = + if is_small_int y then + if unsafe_to_int y = -1 then + unsafe_of_int 0 + else if is_small_int x then + unsafe_of_int (unsafe_to_int x mod unsafe_to_int y) + else + c_rem x y + else + c_rem x y + external div_rem: t -> t -> (t * t) = "ml_z_div_rem" -external succ: t -> t = succ@ASM -external pred: t -> t = pred@ASM -external abs: t -> t = abs@ASM -external logand: t -> t -> t = logand@ASM -external logor: t -> t -> t = logor@ASM -external logxor: t -> t -> t = logxor@ASM -external lognot: t -> t = lognot@ASM -external shift_left: t -> int -> t = shift_left@ASM -external shift_right: t -> int -> t = shift_right@ASM -external shift_right_trunc: t -> int -> t = shift_right_trunc@ASM + +external c_divexact: t -> t -> t = "ml_z_divexact" + +let divexact x y = + if is_small_int y then + if unsafe_to_int y = -1 then + neg x + else if is_small_int x then + unsafe_of_int (unsafe_to_int x / unsafe_to_int y) + else + c_divexact x y + else + c_divexact x y + +external c_succ: t -> t = "ml_z_succ" + +let succ x = + if is_small_int x && unsafe_to_int x <> max_int + then unsafe_of_int (unsafe_to_int x + 1) + else c_succ x + +external c_pred: t -> t = "ml_z_pred" + +let pred x = + if is_small_int x && unsafe_to_int x <> min_int + then unsafe_of_int (unsafe_to_int x - 1) + else c_pred x + +external c_abs: t -> t = "ml_z_abs" + +let abs x = + if is_small_int x then + if unsafe_to_int x >= 0 then x + else if unsafe_to_int x <> min_int then + unsafe_of_int (- unsafe_to_int x) + else + c_abs x + else + c_abs x + +external c_logand: t -> t -> t = "ml_z_logand" + +let logand x y = + if is_small_int x && is_small_int y + then unsafe_of_int (unsafe_to_int x land unsafe_to_int y) + else c_logand x y + +external c_logor: t -> t -> t = "ml_z_logor" + +let logor x y = + if is_small_int x && is_small_int y + then unsafe_of_int (unsafe_to_int x lor unsafe_to_int y) + else c_logor x y + +external c_logxor: t -> t -> t = "ml_z_logxor" + +let logxor x y = + if is_small_int x && is_small_int y + then unsafe_of_int (unsafe_to_int x lxor unsafe_to_int y) + else c_logxor x y + +external c_lognot: t -> t = "ml_z_lognot" + +let lognot x = + if is_small_int x + then unsafe_of_int (unsafe_to_int x lxor (-1)) + else c_lognot x + +external c_shift_left: t -> int -> t = "ml_z_shift_left" + +let shift_left x y = + if is_small_int x && y >= 0 && y < Sys.word_size then begin + let z = unsafe_to_int x lsl y in + if z asr y = unsafe_to_int x + then unsafe_of_int z + else c_shift_left x y + end else + c_shift_left x y + +external c_shift_right: t -> int -> t = "ml_z_shift_right" + +let shift_right x y = + if is_small_int x && y >= 0 then + unsafe_of_int + (unsafe_to_int x asr (if y < Sys.word_size then y else Sys.word_size - 1)) + else + c_shift_right x y + +external c_shift_right_trunc: t -> int -> t = "ml_z_shift_right_trunc" + +let shift_right_trunc x y = + if is_small_int x && y >= 0 then + if y >= Sys.word_size then + unsafe_of_int 0 + else if unsafe_to_int x >= 0 then + unsafe_of_int (unsafe_to_int x lsr y) + else + unsafe_of_int (- ((- unsafe_to_int x) lsr y)) + else + c_shift_right_trunc x y + external of_int: int -> t = "ml_z_of_int" @NOALLOC external of_int32: int32 -> t = "ml_z_of_int32" external of_int64: int64 -> t = "ml_z_of_int64" external of_nativeint: nativeint -> t = "ml_z_of_nativeint" external of_float: float -> t = "ml_z_of_float" -external to_int: t -> int = "ml_z_to_int" + +external c_to_int: t -> int = "ml_z_to_int" + +let to_int x = + if is_small_int x then unsafe_to_int x else c_to_int x + external to_int32: t -> int32 = "ml_z_to_int32" external to_int64: t -> int64 = "ml_z_to_int64" external to_nativeint: t -> nativeint = "ml_z_to_nativeint" @@ -75,7 +237,6 @@ external extract: t -> int -> int -> t = "ml_z_extract" external powm: t -> t -> t -> t = "ml_z_powm" external pow: t -> int -> t = "ml_z_pow" external powm_sec: t -> t -> t -> t = "ml_z_powm_sec" -external divexact: t -> t -> t = divexact@ASM external root: t -> int -> t = "ml_z_root" external rootrem: t -> int -> t * t = "ml_z_rootrem" external invert: t -> t -> t = "ml_z_invert" @@ -158,7 +319,7 @@ external testbit_internal: t -> int -> bool = "ml_z_testbit" @NOALLOC let testbit x n = if n >= 0 then testbit_internal x n else invalid_arg "Z.testbit" (* The test [n >= 0] is done in Caml rather than in the C stub code - so that the latter raises no exceptions and can be declared "noalloc". *) + so that the latter raises no exceptions and can be declared @NOALLOC. *) let is_odd x = testbit_internal x 0 let is_even x = not (testbit_internal x 0) @@ -219,22 +380,22 @@ let sprint () x = to_string x let bprint b x = Buffer.add_string b (to_string x) let pp_print f x = Format.pp_print_string f (to_string x) -external (~-): t -> t = neg@ASM +let (~-) = neg let (~+) x = x -external (+): t -> t -> t = add@ASM -external (-): t -> t -> t = sub@ASM -external ( * ): t -> t -> t = mul@ASM -external (/): t -> t -> t = div@ASM +let (+) = add +let (-) = sub +let ( * ) = mul +let (/) = div external (/>): t -> t -> t = "ml_z_cdiv" external (/<): t -> t -> t = "ml_z_fdiv" -external (/|): t -> t -> t = divexact@ASM -external (mod): t -> t -> t = rem@ASM -external (land): t -> t -> t = logand@ASM -external (lor): t -> t -> t = logor@ASM -external (lxor): t -> t -> t = logxor@ASM -external (~!): t -> t = lognot@ASM -external (lsl): t -> int -> t = shift_left@ASM -external (asr): t -> int -> t = shift_right@ASM +let (/|) = divexact +let (mod) = rem +let (land) = logand +let (lor) = logor +let (lxor) = logxor +let (~!) = lognot +let (lsl) = shift_left +let (asr) = shift_right external (~$): int -> t = "ml_z_of_int" @NOALLOC external ( ** ): t -> int -> t = "ml_z_pow" From 2933e9afcbefce28481a8dc54e2623d8fac71270 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Mon, 4 Jan 2021 12:53:20 +0100 Subject: [PATCH 3/4] Define of_int and ~$ as the %identity primitive This has been problematic with old versions of OCaml, but this should be fine since 4.04 (see OCaml PRs #555 and #600) and is certainly fine since 4.11 (see PR #9316). --- z.mlip | 4 ++-- z.mlp | 45 ++++++++++++++++++++++----------------------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/z.mlip b/z.mlip index 69b5102..fe6dbca 100644 --- a/z.mlip +++ b/z.mlip @@ -65,7 +65,7 @@ val one: t val minus_one: t (** The number -1. *) -external of_int: int -> t = "ml_z_of_int" @NOALLOC +external of_int: int -> t = "%identity" (** Converts from a base integer. *) external of_int32: int32 -> t = "ml_z_of_int32" @@ -680,7 +680,7 @@ val (lsl): t -> int -> t val (asr): t -> int -> t (** Bit-wise shift to the right [shift_right]. *) -external (~$): int -> t = "ml_z_of_int" @NOALLOC +external (~$): int -> t = "%identity" (** Conversion from [int] [of_int]. *) diff --git a/z.mlp b/z.mlp index 8660f57..753299d 100644 --- a/z.mlp +++ b/z.mlp @@ -27,13 +27,13 @@ let _ = Callback.register_exception "ml_z_overflow" Overflow external is_small_int: t -> bool = "%obj_is_int" external unsafe_to_int: t -> int = "%identity" -external unsafe_of_int: int -> t = "%identity" +external of_int: int -> t = "%identity" external c_neg: t -> t = "ml_z_neg" let neg x = if is_small_int x && unsafe_to_int x <> min_int - then unsafe_of_int (- unsafe_to_int x) + then of_int (- unsafe_to_int x) else c_neg x external c_add: t -> t -> t = "ml_z_add" @@ -43,7 +43,7 @@ let add x y = let z = unsafe_to_int x + unsafe_to_int y in (* Overflow check -- Hacker's Delight, section 2.12 *) if (z lxor unsafe_to_int x) land (z lxor unsafe_to_int y) >= 0 - then unsafe_of_int z + then of_int z else c_add x y end else c_add x y @@ -56,7 +56,7 @@ let sub x y = (* Overflow check -- Hacker's Delight, section 2.12 *) if (unsafe_to_int x lxor unsafe_to_int y) land (z lxor unsafe_to_int x) >= 0 - then unsafe_of_int z + then of_int z else c_sub x y end else c_sub x y @@ -67,7 +67,7 @@ external c_mul: t -> t -> t = "ml_z_mul" let mul x y = if is_small_int x && is_small_int y && not (mul_overflows (unsafe_to_int x) (unsafe_to_int y)) - then unsafe_of_int (unsafe_to_int x * unsafe_to_int y) + then of_int (unsafe_to_int x * unsafe_to_int y) else c_mul x y external c_div: t -> t -> t = "ml_z_div" @@ -77,7 +77,7 @@ let div x y = if unsafe_to_int y = -1 then neg x else if is_small_int x then - unsafe_of_int (unsafe_to_int x / unsafe_to_int y) + of_int (unsafe_to_int x / unsafe_to_int y) else c_div x y else @@ -91,9 +91,9 @@ external c_rem: t -> t -> t = "ml_z_rem" let rem x y = if is_small_int y then if unsafe_to_int y = -1 then - unsafe_of_int 0 + of_int 0 else if is_small_int x then - unsafe_of_int (unsafe_to_int x mod unsafe_to_int y) + of_int (unsafe_to_int x mod unsafe_to_int y) else c_rem x y else @@ -108,7 +108,7 @@ let divexact x y = if unsafe_to_int y = -1 then neg x else if is_small_int x then - unsafe_of_int (unsafe_to_int x / unsafe_to_int y) + of_int (unsafe_to_int x / unsafe_to_int y) else c_divexact x y else @@ -118,14 +118,14 @@ external c_succ: t -> t = "ml_z_succ" let succ x = if is_small_int x && unsafe_to_int x <> max_int - then unsafe_of_int (unsafe_to_int x + 1) + then of_int (unsafe_to_int x + 1) else c_succ x external c_pred: t -> t = "ml_z_pred" let pred x = if is_small_int x && unsafe_to_int x <> min_int - then unsafe_of_int (unsafe_to_int x - 1) + then of_int (unsafe_to_int x - 1) else c_pred x external c_abs: t -> t = "ml_z_abs" @@ -134,7 +134,7 @@ let abs x = if is_small_int x then if unsafe_to_int x >= 0 then x else if unsafe_to_int x <> min_int then - unsafe_of_int (- unsafe_to_int x) + of_int (- unsafe_to_int x) else c_abs x else @@ -144,28 +144,28 @@ external c_logand: t -> t -> t = "ml_z_logand" let logand x y = if is_small_int x && is_small_int y - then unsafe_of_int (unsafe_to_int x land unsafe_to_int y) + then of_int (unsafe_to_int x land unsafe_to_int y) else c_logand x y external c_logor: t -> t -> t = "ml_z_logor" let logor x y = if is_small_int x && is_small_int y - then unsafe_of_int (unsafe_to_int x lor unsafe_to_int y) + then of_int (unsafe_to_int x lor unsafe_to_int y) else c_logor x y external c_logxor: t -> t -> t = "ml_z_logxor" let logxor x y = if is_small_int x && is_small_int y - then unsafe_of_int (unsafe_to_int x lxor unsafe_to_int y) + then of_int (unsafe_to_int x lxor unsafe_to_int y) else c_logxor x y external c_lognot: t -> t = "ml_z_lognot" let lognot x = if is_small_int x - then unsafe_of_int (unsafe_to_int x lxor (-1)) + then of_int (unsafe_to_int x lxor (-1)) else c_lognot x external c_shift_left: t -> int -> t = "ml_z_shift_left" @@ -174,7 +174,7 @@ let shift_left x y = if is_small_int x && y >= 0 && y < Sys.word_size then begin let z = unsafe_to_int x lsl y in if z asr y = unsafe_to_int x - then unsafe_of_int z + then of_int z else c_shift_left x y end else c_shift_left x y @@ -183,7 +183,7 @@ external c_shift_right: t -> int -> t = "ml_z_shift_right" let shift_right x y = if is_small_int x && y >= 0 then - unsafe_of_int + of_int (unsafe_to_int x asr (if y < Sys.word_size then y else Sys.word_size - 1)) else c_shift_right x y @@ -193,15 +193,14 @@ external c_shift_right_trunc: t -> int -> t = "ml_z_shift_right_trunc" let shift_right_trunc x y = if is_small_int x && y >= 0 then if y >= Sys.word_size then - unsafe_of_int 0 + of_int 0 else if unsafe_to_int x >= 0 then - unsafe_of_int (unsafe_to_int x lsr y) + of_int (unsafe_to_int x lsr y) else - unsafe_of_int (- ((- unsafe_to_int x) lsr y)) + of_int (- ((- unsafe_to_int x) lsr y)) else c_shift_right_trunc x y -external of_int: int -> t = "ml_z_of_int" @NOALLOC external of_int32: int32 -> t = "ml_z_of_int32" external of_int64: int64 -> t = "ml_z_of_int64" external of_nativeint: nativeint -> t = "ml_z_of_nativeint" @@ -396,7 +395,7 @@ let (lxor) = logxor let (~!) = lognot let (lsl) = shift_left let (asr) = shift_right -external (~$): int -> t = "ml_z_of_int" @NOALLOC +external (~$): int -> t = "%identity" external ( ** ): t -> int -> t = "ml_z_pow" module Compare = struct From d8b77d53ab6a25f5d27ebfec65309ad0b087b597 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Mon, 4 Jan 2021 12:53:20 +0100 Subject: [PATCH 4/4] Update .opam file Minimum OCaml version is 4.04.0 (to avoid bug with %identity used for Z.of_int). ocamlfind is a use-time dependency, not just a build-time dependency. --- zarith.opam | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarith.opam b/zarith.opam index 8eedde4..8a4e3d2 100644 --- a/zarith.opam +++ b/zarith.opam @@ -27,8 +27,8 @@ install: [ [make "install"] ] depends: [ - "ocaml" - "ocamlfind" {build} + "ocaml" {>= "4.04.0"} + "ocamlfind" "conf-gmp" "conf-perl" {build} ]