From 973de044b10a389a45c68b4002a696e99b8874a7 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Mon, 4 Jan 2021 12:53:20 +0100
Subject: [PATCH 1/4] Remove install_frame_tables

This was empty code.
---
 caml_z.c | 6 ------
 z.mlp    | 3 ---
 2 files changed, 9 deletions(-)

diff --git a/caml_z.c b/caml_z.c
index f34c55c..9867de7 100644
--- a/caml_z.c
+++ b/caml_z.c
@@ -3367,12 +3367,6 @@ static void ml_z_dump_count()
 }
 #endif
 
-CAMLprim value ml_z_install_frametable()
-{
-  /* nothing to do for bytecode version */
-  return Val_unit;
-}
-
 CAMLprim value ml_z_init()
 {
   ml_z_2p32 = ldexp(1., 32);
diff --git a/z.mlp b/z.mlp
index 09377ab..1e979d4 100644
--- a/z.mlp
+++ b/z.mlp
@@ -23,9 +23,6 @@ exception Overflow
 external init: unit -> unit = "ml_z_init"
 let _ = init ()
 
-external install_frametable: unit -> unit = install_frametable@ASM
-let _ =  install_frametable ()
-
 let _ = Callback.register_exception "ml_z_overflow" Overflow
 
 external neg: t -> t = neg@ASM

From f9a3d0d911c9662952c393b853b3cb2ca3cdd32c Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Mon, 4 Jan 2021 12:56:32 +0100
Subject: [PATCH 2/4] Program the fast paths in OCaml instead of in asm or in C

Benefits:

- The fast paths no longer incurs the overhead of caml_c_call.
  Hence, even though the OCaml fast-path code has more instructions
  than the hand-coded asm code, Z arithmetic gets faster on
  small integers.

- For multiplication, the test for overflow cannot be efficiently
  implemented in OCaml.  We call a C function of the "noalloc" kind,
  again avoiding caml_c_call.  The C function uses the
   __builtin_mul_overflow from GCC >=5 and from Clang if available,
  or the _mul128 intrinsic from MSVC.
  Again, this executes more instructions than the hand-coded asm code,
  but we still get comparable performance.

- We get rid of the asm files, which were not small and painful to
  adapt to multiple ABIs for the same processor.
---
 caml_z.c                | 112 ++++++++---
 caml_z_arm.S            | 265 --------------------------
 caml_z_i686.S           | 406 ----------------------------------------
 caml_z_x86_64.S         | 396 ---------------------------------------
 caml_z_x86_64_mingw64.S | 381 -------------------------------------
 z.mlip                  |  79 ++++----
 z.mlp                   | 225 ++++++++++++++++++----
 7 files changed, 321 insertions(+), 1543 deletions(-)
 delete mode 100644 caml_z_arm.S
 delete mode 100644 caml_z_i686.S
 delete mode 100644 caml_z_x86_64.S
 delete mode 100644 caml_z_x86_64_mingw64.S

diff --git a/caml_z.c b/caml_z.c
index 9867de7..da0957d 100644
--- a/caml_z.c
+++ b/caml_z.c
@@ -58,6 +58,14 @@ extern "C" {
 
 #ifdef _MSC_VER
 #include <float.h>
+#include <intrin.h>
+#endif
+
+/* The "__has_builtin" special macro from Clang */
+#ifdef __has_builtin
+#define HAS_BUILTIN(x) __has_builtin(x)
+#else
+#define HAS_BUILTIN(x) 0
 #endif
 
 /*---------------------------------------------------
@@ -71,6 +79,14 @@ extern "C" {
 #define Z_FAST_PATH  1
 #define Z_USE_NATINT 1
 
+/* Whether the fast path (arguments and result are small integers)
+   has already be handled in OCaml, so that there is no need to
+   re-test for it in C functions.
+   Applies to: neg, abs, add, sub, mul, div, rem, succ, pred,
+   logand, logor, logxor, lognot, shifts, divexact.
+*/
+#define Z_FAST_PATH_IN_OCAML 1
+
 /* Sanity checks. */
 #define Z_PERFORM_CHECK 0
 
@@ -1267,7 +1283,7 @@ CAMLprim value ml_z_neg(value arg)
 {
   Z_MARK_OP;
   Z_CHECK(arg);
-#if Z_FAST_PATH && !defined(Z_ASM_neg)
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg)) {
     /* fast path */
     if (arg > Val_long(Z_MIN_INT)) return 2 - arg;
@@ -1293,7 +1309,7 @@ CAMLprim value ml_z_abs(value arg)
 {
   Z_MARK_OP;
   Z_CHECK(arg);
-#if Z_FAST_PATH && !defined(Z_ASM_abs)
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg)) {
     /* fast path */
     if (arg >= Val_long(0)) return arg;
@@ -1402,7 +1418,7 @@ CAMLprim value ml_z_add(value arg1, value arg2)
 {
   Z_MARK_OP;
   Z_CHECK(arg1); Z_CHECK(arg2);
-#if Z_FAST_PATH && !defined(Z_ASM_add)
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg1) && Is_long(arg2)) {
     /* fast path */
     intnat a1 = Long_val(arg1);
@@ -1420,7 +1436,7 @@ CAMLprim value ml_z_sub(value arg1, value arg2)
 {
   Z_MARK_OP;
   Z_CHECK(arg1); Z_CHECK(arg2);
-#if Z_FAST_PATH && !defined(Z_ASM_sub)
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg1) && Is_long(arg2)) {
     /* fast path */
     intnat a1 = Long_val(arg1);
@@ -1434,19 +1450,67 @@ CAMLprim value ml_z_sub(value arg1, value arg2)
   return ml_z_addsub(arg1, arg2, Z_SIGN_MASK);
 }
 
+CAMLprim value ml_z_mul_overflows(value vx, value vy)
+{
+#if HAS_BUILTIN(__builtin_mul_overflow) || __GNUC__ >= 5
+  intnat z;
+  return Val_bool(__builtin_mul_overflow(vx - 1, vy >> 1, &z));
+#elif defined(__GNUC__) && defined(__x86_64__)
+  intnat z;
+  unsigned char o;
+  asm("imulq %1, %3; seto %0"
+      : "=q" (o), "=r" (z)
+      : "1" (vx - 1), "r" (vy >> 1)
+      : "cc");
+  return Val_int(o);
+#elif defined(_MSC_VER) && defined(_M_X64)
+  intnat hi, lo;
+  lo = _mul128(vx - 1, vy >> 1, &hi);
+  return Val_bool(hi != lo >> 63);
+#else
+  /* Portable C code */
+  intnat x = Long_val(vx);
+  intnat y = Long_val(vy);
+  /* Quick approximate check for small values of x and y.
+     Also catches the cases x = 0, x = 1, y = 0, y = 1. */
+  if (Z_FITS_HINT(x)) {
+    if (Z_FITS_HINT(y)) return Val_false;
+    if ((uintnat) x <= 1) return Val_false;
+  }
+  if ((uintnat) y <= 1) return Val_false;
+#if 1
+  /* Give up at this point; we'll go through the general case in ml_z_mul */
+  return Val_true;
+#else
+  /* The product x*y is representable as an unboxed integer if
+     it is in [Z_MIN_INT, Z_MAX_INT].
+     x >= 0 y >= 0:  x*y >= 0 and x*y <= Z_MAX_INT <-> y <= Z_MAX_INT / x
+     x < 0  y >= 0:  x*y <= 0 and x*y >= Z_MIN_INT <-> x >= Z_MIN_INT / y
+     x >= 0 y < 0 :  x*y <= 0 and x*y >= Z_MIN_INT <-> y >= Z_MIN_INT / x
+     x < 0  y < 0 :  x*y >= 0 and x*y <= Z_MAX_INT <-> x >= Z_MAX_INT / y */
+  if (x >= 0)
+    if (y >= 0)
+      return Val_bool(y > Z_MAX_INT / x);
+    else
+      return Val_bool(y < Z_MIN_INT / x);
+  else
+    if (y >= 0)
+      return Val_bool(x < Z_MIN_INT / y);
+    else
+      return Val_bool(x < Z_MAX_INT / y);
+#endif
+#endif
+}
+
 CAMLprim value ml_z_mul(value arg1, value arg2)
 {
   Z_DECL(arg1); Z_DECL(arg2);
   Z_MARK_OP;
   Z_CHECK(arg1); Z_CHECK(arg2);
-#if Z_FAST_PATH && !defined(Z_ASM_mul)
-  if (Is_long(arg1) && Is_long(arg2)) {
-    /* fast path */
-    intnat a1 = Long_val(arg1);
-    intnat a2 = Long_val(arg2);
-    if (!a1 || !a2) return Val_long(0);
-    /* small argument case */
-    if (Z_FITS_HINT(arg1) && Z_FITS_HINT(arg2)) return Val_long(a1 * a2);
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
+  if (Is_long(arg1) && Is_long(arg2) &&
+      ml_z_mul_overflows(arg1, arg2) == Val_false) {
+    return Val_long(Long_val(a1) * Long_val(a2));
   }
 #endif
   /* mpn_ version */
@@ -1554,7 +1618,7 @@ CAMLprim value ml_z_div(value arg1, value arg2)
 {
   Z_MARK_OP;
   Z_CHECK(arg1); Z_CHECK(arg2);
-#if Z_FAST_PATH && !defined(Z_ASM_div)
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg1) && Is_long(arg2)) {
     /* fast path */
     intnat a1 = Long_val(arg1);
@@ -1574,7 +1638,7 @@ CAMLprim value ml_z_rem(value arg1, value arg2)
 {
   Z_MARK_OP;
   Z_CHECK(arg1); Z_CHECK(arg2);
-#if Z_FAST_PATH && !defined(Z_ASM_rem)
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg1) && Is_long(arg2)) {
     /* fast path */
     intnat a1 = Long_val(arg1);
@@ -1706,7 +1770,7 @@ CAMLprim value ml_z_succ(value arg)
 {
   Z_MARK_OP;
   Z_CHECK(arg);
-#if Z_FAST_PATH && !defined(Z_ASM_succ)
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg)) {
     /* fast path */
     if (arg < Val_long(Z_MAX_INT)) return arg + 2;
@@ -1721,7 +1785,7 @@ CAMLprim value ml_z_pred(value arg)
 {
   Z_MARK_OP;
   Z_CHECK(arg);
-#if Z_FAST_PATH && !defined(Z_ASM_pred)
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg)) {
     /* fast path */
      if (arg > Val_long(Z_MIN_INT)) return arg - 2;
@@ -1929,7 +1993,7 @@ CAMLprim value ml_z_logand(value arg1, value arg2)
 {
   Z_MARK_OP;
   Z_CHECK(arg1); Z_CHECK(arg2);
-#if Z_FAST_PATH && !defined(Z_ASM_logand)
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg1) && Is_long(arg2)) {
     /* fast path */
     return arg1 & arg2;
@@ -2011,7 +2075,7 @@ CAMLprim value ml_z_logor(value arg1, value arg2)
 {
   Z_MARK_OP;
   Z_CHECK(arg1); Z_CHECK(arg2);
-#if Z_FAST_PATH && !defined(Z_ASM_logor)
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg1) && Is_long(arg2)) {
     /* fast path */
     return arg1 | arg2;
@@ -2097,7 +2161,7 @@ CAMLprim value ml_z_logxor(value arg1, value arg2)
 {
   Z_MARK_OP;
   Z_CHECK(arg1); Z_CHECK(arg2);
-#if Z_FAST_PATH && !defined(Z_ASM_logxor)
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg1) && Is_long(arg2)) {
     /* fast path */
     return (arg1 ^ arg2) | 1;
@@ -2183,7 +2247,7 @@ CAMLprim value ml_z_lognot(value arg)
 {
   Z_MARK_OP;
   Z_CHECK(arg);
-#if Z_FAST_PATH && !defined(Z_ASM_lognot)
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg)) {
     /* fast path */
     return (~arg) | 1;
@@ -2232,7 +2296,7 @@ CAMLprim value ml_z_shift_left(value arg, value count)
   if (!c) return arg;
   c1 = c / Z_LIMB_BITS;
   c2 = c % Z_LIMB_BITS;
-#if Z_FAST_PATH && !defined(Z_ASM_shift_left)
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg) && !c1) {
     /* fast path */
     value a = arg - 1;
@@ -2281,7 +2345,7 @@ CAMLprim value ml_z_shift_right(value arg, value count)
   if (!c) return arg;
   c1 = c / Z_LIMB_BITS;
   c2 = c % Z_LIMB_BITS;
-#if Z_FAST_PATH && !defined(Z_ASM_shift_right)
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg)) {
     /* fast path */
     if (c1) {
@@ -2342,7 +2406,7 @@ CAMLprim value ml_z_shift_right_trunc(value arg, value count)
   if (!c) return arg;
   c1 = c / Z_LIMB_BITS;
   c2 = c % Z_LIMB_BITS;
-#if Z_FAST_PATH
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg)) {
     /* fast path */
     if (c1) return Val_long(0);
@@ -2661,7 +2725,7 @@ CAMLprim value ml_z_divexact(value arg1, value arg2)
   Z_DECL(arg1); Z_DECL(arg2);
   Z_MARK_OP;
   Z_CHECK(arg1); Z_CHECK(arg2);
-#if Z_FAST_PATH
+#if Z_FAST_PATH && !Z_FAST_PATH_IN_OCAML
   if (Is_long(arg1) && Is_long(arg2)) {
     /* fast path */
     intnat a1 = Long_val(arg1);
diff --git a/caml_z_arm.S b/caml_z_arm.S
deleted file mode 100644
index 9c88451..0000000
--- a/caml_z_arm.S
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
-   Assembly version for the fast path of some functions in Z:
-   - ARM v5M and above target
-   - System 5 ABI and assembly syntax
-   - GNU as
-
-
-   This file is part of the Zarith library
-   http://forge.ocamlcore.org/projects/zarith .
-   It is distributed under LGPL 2 licensing, with static linking exception.
-   See the LICENSE file included in the distribution.
-
-   Copyright (c) 2013 Xavier Leroy, INRIA Paris-Rocquencourt,
-   and Antoine Miné, Abstraction project.
-   Abstraction is part of the LIENS (Laboratoire d'Informatique de l'ENS),
-   a joint laboratory by:
-   CNRS (Centre national de la recherche scientifique, France),
-   ENS (École normale supérieure, Paris, France),
-   INRIA Rocquencourt (Institut national de recherche en informatique, France).
-
- */
-
-
-        /* makes the stack non-executable. */
-        .section .note.GNU-stack,"",%progbits
-
-
-        /* helper functions */
-        /* **************** */
-
-
-        /* dot prefix for local labels */
-
-#define L(x) .L##x
-	
-        /* function prolog & epilog */
-
-#define PROLOG(proc) \
-        .text;   \
-        .global ml_as_z_##proc; \
-        .type   ml_as_z_##proc, %function; \
-ml_as_z_##proc:
-
-#define EPILOG(proc) \
-        .size   ml_as_z_##proc, . - ml_as_z_##proc
-
-        /* calling C functions */
-
-#define C_JMP(proc) \
-        b     ml_z_##proc(PLT)
-
-        /* operation counter (not implemented) */
-
-#define OP
-
-        /* unary arithmetics */
-        /* ***************** */
-
-        /* neg */
-        PROLOG(neg)
-L(negenter):	
-	tst     r0, #1
-        beq     L(neg)
-        rsbs    r1, r0, #2      /* r1 = 2 - r0 */
-        bvs     L(neg)
-        mov     r0, r1
-        OP
-        bx      lr
-L(neg):
-        C_JMP(neg)
-        EPILOG(neg)
-
-
-        /* abs */
-        PROLOG(abs)
-	tst     r0, #1
-        beq     L(abs)
-        cmp     r0, #0
-        bge     L(abs2)
-        rsbs    r1, r0, #2      /* r1 = 2 - r0 */
-        bvs     L(abs)
-        mov     r0, r1
-L(abs2):
-        OP
-        bx      lr
-L(abs):
-        C_JMP(abs)
-        EPILOG(abs)
-
-
-        /* succ */
-        PROLOG(succ)
-        tst     r0, #1
-        beq     L(succ)
-        adds    r1, r0, #2
-        bvs     L(succ)
-        mov     r0, r1
-        OP
-        bx	lr
-L(succ):
-        C_JMP(succ)
-        EPILOG(succ)
-
-
-        /* pred */
-        PROLOG(pred)
-        tst     r0, #1
-        beq     L(pred)
-        subs    r1, r0, #2
-        bvs     L(pred)
-        mov     r0, r1
-        OP
-        bx	lr
-L(pred):
-        C_JMP(pred)
-        EPILOG(pred)
-
-
-
-
-        /* binary arithmetics */
-        /* ****************** */
-
-
-        /* add */
-        PROLOG(add)
-	and     r2, r0, r1
-        tst     r2, #1
-        beq     L(add)
-        sub     r2, r0, #1
-        adds    r2, r2, r1
-        bvs     L(add)
-	mov     r0, r2
-        OP
-        bx	lr
-L(add):
-        C_JMP(add)
-        EPILOG(add)
-
-
-        /* sub */
-        PROLOG(sub)
-	and     r2, r0, r1
-        tst     r2, #1
-        beq     L(sub)
-	subs    r2, r0, r1
-        bvs     L(sub)
-	add     r0, r2, #1
-        OP
-        bx	lr
-L(sub):
-        C_JMP(sub)
-        EPILOG(sub)
-
-
-        /* mul */
-        PROLOG(mul)
-	and     r2, r0, r1
-        tst     r2, #1
-        beq     L(mul)
-        sub     r2, r0, #1
-        mov     r3, r1, asr #1
-        smull   r3, r12, r2, r3  /* r3 = low half of product, r12 = high half */
-        cmp     r12, r3, asr #31 /* high half must equal sign-ext of low half */
-        bne     L(mul)           /* otherwise, overflow occurred */
-        add     r0, r3, #1
-        OP
-        bx	lr
-L(mul):
-        C_JMP(mul)
-        EPILOG(mul)
-
-        /* bit operations */
-        /* ************** */
-
-
-        /* not */
-        PROLOG(lognot)
-	tst     r0, #1
-        beq     L(lognot)
-        sub     r0, r0, #1
-        mvn     r0, r0
-        OP
-        bx	lr
-L(lognot):
-        C_JMP(lognot)
-        EPILOG(lognot)
-
-
-        /* and */
-        PROLOG(logand)
-	and     r2, r0, r1
-        tst     r2, #1
-        beq     L(logand)
-        mov     r0, r2
-        OP
-        bx	lr
-L(logand):
-        C_JMP(logand)
-        EPILOG(logand)
-
-
-         /* or */
-        PROLOG(logor)
-	and     r2, r0, r1
-        tst     r2, #1
-        beq     L(logor)
-        orr     r0, r0, r1
-        OP
-        bx	lr
-L(logor):
-        C_JMP(logor)
-        EPILOG(logor)
-
-
-         /* xor */
-        PROLOG(logxor)
-	and     r2, r0, r1
-        tst     r2, #1
-        beq     L(logxor)
-        eor     r0, r0, r1
-        orr     r0, r0, #1
-        OP
-        bx	lr
-L(logxor):
-        C_JMP(logxor)
-        EPILOG(logxor)
-
-
-         /* shift_left */
-        PROLOG(shift_left)
-	tst     r0, #1
-        beq     L(shift_left)
-        cmp     r1, #63        /* 31 in 2n+1 encoding */
-        bhs     L(shift_left)
-        mov     r3, r1, asr #1
-	sub     r2, r0, #1
-        mov     r12, r2, lsl r3
-        cmp     r2, r12, asr r3
-        bne     L(shift_left)  /* overflow occurred */
-        orr     r0, r12, #1
-        OP
-        bx	lr
-L(shift_left):
-        C_JMP(shift_left)
-        EPILOG(shift_left)
-
-
-        /* shift_right */
-        PROLOG(shift_right)
-        tst     r0, #1
-        beq     L(shift_right)
-        movs    r2, r1, asr #1
-        bmi     L(shift_right) /* if shift amount < 0, go to slow path */
-        cmp     r2, #31
-        movlo   r0, r0, asr r2 /* if shift amount < 31, shift by this amount */
-        movhs   r0, r0, asr #31 /* if shift amount >= 31, shift by 31 */
-	orr     r0, r0, #1
-        OP
-        bx	lr
-L(shift_right):
-        C_JMP(shift_right)
-        EPILOG(shift_right)
-
-
diff --git a/caml_z_i686.S b/caml_z_i686.S
deleted file mode 100644
index 83321db..0000000
--- a/caml_z_i686.S
+++ /dev/null
@@ -1,406 +0,0 @@
-/*
-   Assembly version for the fast path of some functions in Z:
-   - x86 target
-   - System 5 ABI and assembly syntax
-   - GNU as
-
-
-   This file is part of the Zarith library
-   http://forge.ocamlcore.org/projects/zarith .
-   It is distributed under LGPL 2 licensing, with static linking exception.
-   See the LICENSE file included in the distribution.
-
-   Copyright (c) 2010-2011 Antoine Miné, Abstraction project.
-   Abstraction is part of the LIENS (Laboratoire d'Informatique de l'ENS),
-   a joint laboratory by:
-   CNRS (Centre national de la recherche scientifique, France),
-   ENS (École normale supérieure, Paris, France),
-   INRIA Rocquencourt (Institut national de recherche en informatique, France).
-
- */
-
-
-        /* makes the stack non-executable. */
-#ifdef Z_ELF
-        .section .note.GNU-stack,"",@progbits
-#endif
-
-        /* helper functions */
-        /* **************** */
-
-
-        /* optional underscope prefix for symbols */
-#ifdef Z_UNDERSCORE_PREFIX
-#define SYMB(x) _##x
-#else
-#define SYMB(x)  x
-#endif
-
-	
-        /* optional dot prefix for local labels */
-#ifdef Z_DOT_LABEL_PREFIX
-#define L(x) .L##x
-#else
-#define L(x)  L##x
-#endif
-	
-        /* function prolog & epilog */
-
-#if defined(Z_ELF) || defined(Z_COFF)
-#define FUNCTION_ALIGN 16
-#endif
-#if defined(Z_MACOS)
-#define FUNCTION_ALIGN 4
-#endif
-
-#if defined(Z_ELF)
-#define PROLOG(proc) \
-        .text;   \
-        .globl  SYMB(ml_as_z_##proc); \
-        .type   SYMB(ml_as_z_##proc), @function; \
-        .align  FUNCTION_ALIGN; \
-SYMB(ml_as_z_##proc):
-
-#define EPILOG(proc) \
-        .size   SYMB(ml_as_z_##proc), .-SYMB(ml_as_z_##proc)
-#endif
-
-#if defined(Z_COFF) || defined(Z_MACOS)
-#define PROLOG(proc) \
-        .text;   \
-        .globl  SYMB(ml_as_z_##proc); \
-        .align  FUNCTION_ALIGN; \
-SYMB(ml_as_z_##proc):
-
-#define EPILOG(proc)
-#endif
-
-        /* calling C functions */
-
-#define C_JMP(proc) \
-        jmp     SYMB(ml_z_##proc)
-
-
-        /* operation counter */
-
-#ifdef Z_PERF_COUNTER
-#define OP \
-        incl    SYMB(ml_z_ops_as)
-#else
-#define OP
-#endif
-
-
-
-        /* unary arithmetics */
-        /* ***************** */
-
-        /* neg */
-        PROLOG(neg)
-L(negenter):
-        mov     4(%esp), %eax
-        test    $1, %al
-        jz      L(neg)
-        not     %eax
-        add     $3, %eax
-        jo      L(neg)
-        OP
-        ret
-L(neg):
-        C_JMP(neg)
-        EPILOG(neg)
-
-
-        /* abs */
-        PROLOG(abs)
-        mov     4(%esp), %eax
-        test    $1, %al
-        jz      L(abs)
-        test    %eax, %eax
-        jns     L(abs2)
-        not     %eax
-        add     $3, %eax
-        jo      L(abs)
-L(abs2):
-        OP
-        ret
-L(abs):
-        C_JMP(abs)
-        EPILOG(abs)
-
-
-        /* succ */
-        PROLOG(succ)
-        mov     4(%esp), %eax
-        test    $1, %al
-        jz      L(succ)
-        add     $2, %eax
-        jo      L(succ)
-        OP
-        ret
-L(succ):
-        C_JMP(succ)
-        EPILOG(succ)
-
-
-        /* pred */
-        PROLOG(pred)
-        mov     4(%esp), %eax
-        test    $1, %al
-        jz      L(pred)
-        sub     $2, %eax
-        jo      L(pred)
-        OP
-        ret
-L(pred):
-        C_JMP(pred)
-        EPILOG(pred)
-
-
-
-
-        /* binary arithmetics */
-        /* ****************** */
-
-        /* add */
-        PROLOG(add)
-        mov     4(%esp), %eax
-        test    $1, %al
-        jz      L(add)
-        mov     8(%esp), %ecx
-        test    $1, %cl
-        jz      L(add)
-        dec     %eax
-        add     %ecx, %eax
-        jo      L(add)
-        OP
-        ret
-L(add):
-        C_JMP(add)
-        EPILOG(add)
-
-
-        /* sub */
-        PROLOG(sub)
-        mov     4(%esp), %eax
-        test    $1, %al
-        jz      L(sub)
-        mov     8(%esp), %ecx
-        test    $1, %cl
-        jz      L(sub)
-        sub     %ecx, %eax
-        jo      L(sub)
-        inc     %eax
-        OP
-        ret
-L(sub):
-        C_JMP(sub)
-        EPILOG(sub)
-
-
-        /* mul */
-        PROLOG(mul)
-        mov     4(%esp), %eax
-        test    $1, %al
-        jz      L(mul)
-        mov     8(%esp), %ecx
-        sar     %ecx  /* sets CF to least significant bit */
-        jnc     L(mul)
-        dec     %eax
-        imul    %ecx, %eax
-        jo      L(mul)
-        inc     %eax
-        OP
-        ret
-L(mul):
-        C_JMP(mul)
-        EPILOG(mul)
-
-
-        /* div */
-        PROLOG(div)
-        mov     8(%esp), %ecx
-        sar     %ecx
-        jnc     L(div)   /* not a 31-bit integer */
-        jz      L(div)   /* division by zero */
-        cmp     $-1, %ecx
-	     /* division by -1, the only one that can overflow */
-        je      L(negenter)
-        mov     4(%esp), %eax
-        sar     %eax
-        jnc     L(div)   /* not a 31-bit integer */
-        cdq
-        idiv    %ecx
-	lea	1(%eax, %eax), %eax
-        OP
-        ret
-L(div):
-        C_JMP(div)
-        EPILOG(div)
-
-
-        /* divexacty */
-        PROLOG(divexact)
-        mov     8(%esp), %ecx
-        sar     %ecx
-        jnc     L(divexact)   /* not a 31-bit integer */
-        jz      L(divexact)   /* division by zero */
-        cmp     $-1, %ecx
-	     /* division by -1, the only one that can overflow */
-        je      L(negenter)
-        mov     4(%esp), %eax
-        sar     %eax
-        jnc     L(divexact)   /* not a 31-bit integer */
-        cdq
-        idiv    %ecx
-	lea	1(%eax, %eax), %eax
-        OP
-        ret
-L(divexact):
-        C_JMP(divexact)
-        EPILOG(divexact)
-
-
-        /* rem */
-        PROLOG(rem)
-        mov     4(%esp), %eax
-        sar     %eax
-        jnc     L(rem)   /* not a 31-bit integer */
-        mov     8(%esp), %ecx
-        sar     %ecx
-        jnc     L(rem)   /* not a 31-bit integer */
-        jz      L(rem)   /* division by zero */
-        cmp     $-1, %ecx
-        je      L(remneg)
-        cdq
-        idiv    %ecx
-	lea     1(%edx, %edx), %eax
-        OP
-        ret
-L(remneg):
-        /* division by -1 */
-        mov     $1, %eax
-        OP
-        ret
-L(rem):
-        C_JMP(rem)
-        EPILOG(rem)
-
-
-        /* bit operations */
-        /* ************** */
-
-
-        /* not */
-        PROLOG(lognot)
-        mov     4(%esp), %eax
-        test    $1, %al
-        jz      L(lognot)
-        dec     %eax
-        not     %eax
-        ret
-L(lognot):
-        C_JMP(lognot)
-        EPILOG(lognot)
-
-
-        /* or */
-        PROLOG(logor)
-        mov     4(%esp), %eax
-        test    $1, %al
-        jz      L(logor)
-        mov     8(%esp), %ecx
-        test    $1, %cl
-        jz      L(logor)
-        or      %ecx, %eax
-        OP
-        ret
-L(logor):
-        C_JMP(logor)
-        EPILOG(logor)
-
-        /* and */
-        PROLOG(logand)
-        mov     4(%esp), %eax
-        test    $1, %al
-        jz      L(logand)
-        mov     8(%esp), %ecx
-        test    $1, %cl
-        jz      L(logand)
-        and     %ecx, %eax
-        OP
-        ret
-L(logand):
-        C_JMP(logand)
-        EPILOG(logand)
-
-
-        /* xor */
-        PROLOG(logxor)
-        mov     4(%esp), %eax
-        test    $1, %al
-        jz      L(logxor)
-        mov     8(%esp), %ecx
-        test    $1, %cl
-        jz      L(logxor)
-        xor     %ecx, %eax
-        inc     %eax
-        OP
-        ret
-L(logxor):
-        C_JMP(logxor)
-        EPILOG(logxor)
-
-
-        /* shift_left */
-        PROLOG(shift_left)
-        mov     4(%esp), %eax
-        test    $1, %al
-        jz      L(shift_left)
-        mov     8(%esp), %ecx
-        sar     %ecx
-        cmp     $31, %ecx
-        jae     L(shift_left)
-        dec     %eax
-        sal     %cl, %eax
-        mov     %eax, %edx
-        sar     %cl, %edx
-        inc     %edx
-        cmp     4(%esp), %edx
-        jne     L(shift_left) /* overflow */
-        inc     %eax
-        OP
-        ret
-L(shift_left):
-        C_JMP(shift_left)
-        EPILOG(shift_left)
-
-
-        /* shift_right */
-        PROLOG(shift_right)
-        mov     4(%esp), %eax
-        test    $1, %al
-        jz      L(shift_right)
-        mov     8(%esp), %ecx
-        sar     %ecx
-        js      L(shift_right)
-        cmp     $31, %ecx
-        jae     L(shift_right2)
-        sar     %cl, %eax
-        or      $1, %eax
-        OP
-        ret
-L(shift_right2):
-        /* shift by 31 or more */
-        test    %eax, %eax
-        js      L(shift_right3)
-        mov     $1, %eax
-        OP
-        ret
-L(shift_right3):
-        mov     $-1, %eax
-        OP
-        ret
-L(shift_right):
-        C_JMP(shift_right)
-        EPILOG(shift_right)
diff --git a/caml_z_x86_64.S b/caml_z_x86_64.S
deleted file mode 100644
index 13ebbee..0000000
--- a/caml_z_x86_64.S
+++ /dev/null
@@ -1,396 +0,0 @@
-/*
-   Assembly version for the fast path of some functions in Z:
-   - x86_64 target
-   - System 5 ABI and assembly syntax
-   - GNU as
-
-
-   This file is part of the Zarith library
-   http://forge.ocamlcore.org/projects/zarith .
-   It is distributed under LGPL 2 licensing, with static linking exception.
-   See the LICENSE file included in the distribution.
-
-   Copyright (c) 2010-2011 Antoine Miné, Abstraction project.
-   Abstraction is part of the LIENS (Laboratoire d'Informatique de l'ENS),
-   a joint laboratory by:
-   CNRS (Centre national de la recherche scientifique, France),
-   ENS (École normale supérieure, Paris, France),
-   INRIA Rocquencourt (Institut national de recherche en informatique, France).
-
- */
-
-
-        /* makes the stack non-executable. */
-#ifdef Z_ELF
-        .section .note.GNU-stack,"",@progbits
-#endif
-
-        /* helper functions */
-        /* **************** */
-
-
-        /* optional underscope prefix for symbols */
-#ifdef Z_UNDERSCORE_PREFIX
-#define SYMB(x) _##x
-#else
-#define SYMB(x)  x
-#endif
-
-        /* optional dot prefix for local labels */
-#ifdef Z_DOT_LABEL_PREFIX
-#define L(x) .L##x
-#else
-#define L(x)  L##x
-#endif
-
-        /* function prolog & epilog */
-
-#if defined(Z_ELF)
-#define FUNCTION_ALIGN 16
-#endif
-#if defined(Z_MACOS)
-#define FUNCTION_ALIGN 4
-#endif
-
-#if defined(Z_ELF)
-#define PROLOG(proc) \
-        .text;   \
-        .globl  SYMB(ml_as_z_##proc); \
-        .type   SYMB(ml_as_z_##proc), @function; \
-        .align  FUNCTION_ALIGN; \
-SYMB(ml_as_z_##proc):
-
-#define EPILOG(proc) \
-        .size   SYMB(ml_as_z_##proc), .-SYMB(ml_as_z_##proc)
-#endif
-
-#if defined(Z_MACOS)
-#define PROLOG(proc) \
-        .text;   \
-        .globl  SYMB(ml_as_z_##proc); \
-        .align  FUNCTION_ALIGN; \
-SYMB(ml_as_z_##proc):
-
-#define EPILOG(proc)
-
-#endif
-
-        /* calling C functions */
-
-#if defined(Z_ELF)
-#define C_JMP(proc) \
-        jmp     SYMB(ml_z_##proc@PLT)
-#endif
-
-#if defined(Z_MACOS)
-#define C_JMP(proc) \
-        jmp     SYMB(ml_z_##proc)
-#endif
-
-        /* operation counter */
-
-#ifndef Z_PERF_COUNTER
-#define OP
-#else
-#if defined(Z_ELF) || defined(Z_MACOS)
-#define OP \
-        mov     SYMB(ml_z_ops_as@GOTPCREL(%rip)), %rcx; \
-        addq    $1, (%rcx)
-#endif
-#endif
-
-        /* unary arithmetics */
-        /* ***************** */
-
-        /* neg */
-        PROLOG(neg)
-L(negenter):	
-        test    $1, %dil
-        jz      L(neg)
-        mov     %rdi, %rax
-        not     %rax
-        add     $3, %rax
-        jo      L(neg)
-        OP
-        ret
-L(neg):
-        C_JMP(neg)
-        EPILOG(neg)
-
-
-        /* abs */
-        PROLOG(abs)
-        test    $1, %dil
-        jz      L(abs)
-        mov     %rdi, %rax
-        test    %rdi, %rdi
-        jns     L(abs2)
-        not     %rax
-        add     $3, %rax
-        jo      L(neg)
-L(abs2):
-        OP
-        ret
-L(abs):
-        C_JMP(abs)
-        EPILOG(abs)
-
-
-        /* succ */
-        PROLOG(succ)
-        test    $1, %dil
-        jz      L(succ)
-        mov     %rdi, %rax
-        add     $2, %rax
-        jo      L(succ)
-        OP
-        ret
-L(succ):
-        C_JMP(succ)
-        EPILOG(succ)
-
-
-        /* pred */
-        PROLOG(pred)
-        test    $1, %dil
-        jz      L(pred)
-        mov     %rdi, %rax
-        sub     $2, %rax
-        jo      L(pred)
-        OP
-        ret
-L(pred):
-        C_JMP(pred)
-        EPILOG(pred)
-
-
-
-
-        /* binary arithmetics */
-        /* ****************** */
-
-
-        /* add */
-        PROLOG(add)
-        test    $1, %dil
-        jz      L(add)
-        test    $1, %sil
-        jz      L(add)
-        lea     -1(%rdi), %rax
-        add     %rsi, %rax
-        jo      L(add)
-        OP
-        ret
-L(add):
-        C_JMP(add)
-        EPILOG(add)
-
-
-        /* sub */
-        PROLOG(sub)
-        test    $1, %dil
-        jz      L(sub)
-        test    $1, %sil
-        jz      L(sub)
-        mov     %rdi, %rax
-        sub     %rsi, %rax
-        jo      L(sub)
-        inc     %rax
-        OP
-        ret
-L(sub):
-        C_JMP(sub)
-        EPILOG(sub)
-
-
-        /* mul */
-        PROLOG(mul)
-        test    $1, %dil
-        jz      L(mul)
-        mov     %rsi, %rcx
-        sar     %rcx
-        jnc     L(mul)
-        lea     -1(%rdi), %rax
-        imul    %rcx, %rax
-        jo      L(mul)
-        inc     %rax
-        OP
-        ret
-L(mul):
-        C_JMP(mul)
-        EPILOG(mul)
-
-
-        /* div */
-        PROLOG(div)
-        mov     %rsi, %rcx
-        cmp     $-1, %rsi
-                   /* division by -1, the only one that can overflow */
-        je      L(negenter)
-        sar     %rcx
-        jnc     L(div)	
-        jz      L(div)   /* division by zero */
-        mov     %rdi, %rax
-        sar     %rax
-        jnc     L(div)
-        cqo
-        idiv    %rcx
-        lea     1(%rax, %rax), %rax
-        OP
-        ret
-L(div):
-        C_JMP(div)
-        EPILOG(div)
-
-
-         /* divexact */
-        PROLOG(divexact)
-        mov     %rsi, %rcx
-        cmp     $-1, %rsi
-                   /* division by -1, the only one that can overflow */
-        je      L(negenter)
-        sar     %rcx
-        jnc     L(divexact)	
-        jz      L(divexact)   /* division by zero */
-        mov     %rdi, %rax
-        sar     %rax
-        jnc     L(divexact)
-        cqo
-        idiv    %rcx
-        lea     1(%rax, %rax), %rax
-        OP
-        ret
-L(divexact):
-        C_JMP(divexact)
-        EPILOG(divexact)
-
-
-        /* rem */
-        PROLOG(rem)
-        mov     %rdi, %rax
-        sar     %rax
-        jnc     L(rem)
-        mov     %rsi, %rcx
-        sar     %rcx
-        jnc     L(rem)
-        jz      L(rem)   /* division by zero */
-        cmp     $-1, %rcx
-        je      L(remneg)
-        cqo
-        idiv    %rcx
-        lea     1(%rdx, %rdx), %rax
-        OP
-        ret
-L(remneg):
-        /* division by -1 */
-        mov     $1, %eax
-        OP
-        ret
-L(rem):
-        C_JMP(rem)
-        EPILOG(rem)
-
-
-        /* bit operations */
-        /* ************** */
-
-
-        /* not */
-        PROLOG(lognot)
-        test    $1, %dil
-        jz      L(lognot)
-        lea     -1(%rdi), %rax
-        not     %rax
-        OP
-        ret
-L(lognot):
-        C_JMP(lognot)
-        EPILOG(lognot)
-
-
-        /* and */
-        PROLOG(logand)
-        mov     %rdi, %rax
-        and     %rsi, %rax
-        test    $1, %al
-        jz      L(logand)
-        OP
-        ret
-L(logand):
-        C_JMP(logand)
-        EPILOG(logand)
-
-
-         /* or */
-        PROLOG(logor)
-        test    $1, %dil
-        jz      L(logor)
-        test    $1, %sil
-        jz      L(logor)
-        mov     %rdi, %rax
-        or      %rsi, %rax
-        OP
-        ret
-L(logor):
-        C_JMP(logor)
-        EPILOG(logor)
-
-
-         /* xor */
-        PROLOG(logxor)
-        test    $1, %dil
-        jz      L(logxor)
-        test    $1, %sil
-        jz      L(logxor)
-        lea     -1(%rdi), %rax
-        xor     %rsi, %rax
-        OP
-        ret
-L(logxor):
-        C_JMP(logxor)
-        EPILOG(logxor)
-
-
-         /* shift_left */
-        PROLOG(shift_left)
-        test    $1, %dil
-        jz      L(shift_left)
-        mov     %esi, %ecx
-        sar     %ecx
-        cmp     $127, %rsi /* 63 unboxed */
-        jae     L(shift_left)
-        lea     -1(%rdi), %rax
-        mov     %rax, %r8
-        sal     %cl, %rax
-        mov     %rax, %rdx
-        sar     %cl, %rdx
-        cmp     %r8, %rdx
-        jne     L(shift_left) /* overflow */
-        inc     %rax
-        OP
-        ret
-L(shift_left):
-        C_JMP(shift_left)
-        EPILOG(shift_left)
-
-
-        /* shift_right */
-        PROLOG(shift_right)
-        test    $1, %dil
-        jz      L(shift_right)
-        mov     %rsi, %rcx
-	mov     $63, %eax
-        sar     %rcx
-        js      L(shift_right)
-        cmp     %rax, %rcx /* compare second argument to 63 */
-        cmovae  %eax, %ecx /* if above or equal, then use 63 */
-        mov     %rdi, %rax
-        sar     %cl, %rax
-        or      $1, %rax
-        OP
-        ret
-L(shift_right):
-        C_JMP(shift_right)
-        EPILOG(shift_right)
-
-
diff --git a/caml_z_x86_64_mingw64.S b/caml_z_x86_64_mingw64.S
deleted file mode 100644
index a25ccf6..0000000
--- a/caml_z_x86_64_mingw64.S
+++ /dev/null
@@ -1,381 +0,0 @@
-/*
-   Assembly version for the fast path of some functions in Z:
-   - x86_64 target
-   - Win64 ABI
-   - GNU as
-
-
-   This file is part of the Zarith library
-   http://forge.ocamlcore.org/projects/zarith .
-   It is distributed under LGPL 2 licensing, with static linking exception.
-   See the LICENSE file included in the distribution.
-
-   Copyright (c) 2010-2011 Antoine Miné, Abstraction project.
-   Abstraction is part of the LIENS (Laboratoire d'Informatique de l'ENS),
-   a joint laboratory by:
-   CNRS (Centre national de la recherche scientifique, France),
-   ENS (École normale supérieure, Paris, France),
-   INRIA Rocquencourt (Institut national de recherche en informatique, France).
-
- */
-
-
-        /* helper functions */
-        /* **************** */
-
-
-#define SYMB(x)  x
-#define FUNCTION_ALIGN 16
-
-#define PROLOG(proc) \
-        .text;   \
-        .globl  SYMB(ml_as_z_##proc); \
-        .align  FUNCTION_ALIGN; \
-SYMB(ml_as_z_##proc):\
-
-#define EPILOG(proc)
-
-#define C_JMP(proc) \
-        jmp     SYMB(ml_z_##proc)
-
-	
-        /* operation counter */
-
-#ifndef Z_PERF_COUNTER
-#define OP
-#else
-#define OP \
-        mov     SYMB(ml_z_ops_as(%rip)), %rcx; \
-        addq    $1, (%rcx)
-#endif
-
-        /* unary arithmetics */
-        /* ***************** */
-
-        /* neg */
-        PROLOG(neg)
-        test    $1, %rcx
-        jz      .Lneg
-        mov     %rcx, %rax
-        not     %rax
-        add     $3, %rax
-        jo      .Lneg
-        OP
-        ret
-.Lneg:
-        C_JMP(neg)
-        EPILOG(neg)
-
-
-        /* abs */
-        PROLOG(abs)
-        test    $1, %rcx
-        jz      .Labs
-        mov     %rcx, %rax
-        test    %rcx, %rcx
-        jns     .Labs2
-        not     %rax
-        add     $3, %rax
-        jo      .Lneg
-.Labs2:
-        OP
-        ret
-.Labs:
-        C_JMP(abs)
-        EPILOG(abs)
-
-
-        /* succ */
-        PROLOG(succ)
-        test    $1, %rcx
-        jz      .Lsucc
-        mov     %rcx, %rax
-        add     $2, %rax
-        jo      .Lsucc
-        OP
-        ret
-.Lsucc:
-        C_JMP(succ)
-        EPILOG(succ)
-
-
-        /* pred */
-        PROLOG(pred)
-        test    $1, %rcx
-        jz      .Lpred
-        mov     %rcx, %rax
-        sub     $2, %rax
-        jo      .Lpred
-        OP
-        ret
-.Lpred:
-        C_JMP(pred)
-        EPILOG(pred)
-
-
-
-
-        /* binary arithmetics */
-        /* ****************** */
-
-
-        /* add */
-        PROLOG(add)
-        test    $1, %rcx
-        jz      .Ladd
-        test    $1, %rdx
-        jz      .Ladd
-        lea     -1(%rcx), %rax
-        add     %rdx, %rax
-        jo      .Ladd
-        OP
-        ret
-.Ladd:
-        C_JMP(add)
-        EPILOG(add)
-
-
-        /* sub */
-        PROLOG(sub)
-        test    $1, %rcx
-        jz      .Lsub
-        test    $1, %rdx
-        jz      .Lsub
-        mov     %rcx, %rax
-        sub     %rdx, %rax
-        jo      .Lsub
-        inc     %rax
-        OP
-        ret
-.Lsub:
-        C_JMP(sub)
-        EPILOG(sub)
-
-
-        /* mul */
-        PROLOG(mul)
-        test    $1, %rcx
-        jz      .Lmul
-        test    $1, %rdx
-        jz      .Lmul
-        lea     -1(%rdx), %rax
-	mov	%rcx, %r8
-        sar     %r8
-        imul    %r8, %rax
-        jo      .Lmul
-        inc     %rax
-        OP
-        ret
-.Lmul:
-        C_JMP(mul)
-        EPILOG(mul)
-
-
-        /* div */
-        PROLOG(div)
-        test    $1, %rcx
-        jz      .Ldiv
-        test    $1, %rdx
-        jz      .Ldiv
-        mov     %rdx, %r8
-        mov     %rcx, %rax
-        sar     %r8
-        jz      .Ldiv   /* division by zero */
-        cmp     $-1, %r8
-        je      .Ldivneg
-        sar     %rax
-        cqo
-        idiv    %r8
-        sal     %rax
-        inc     %rax
-        OP
-        ret
-.Ldivneg:
-        /* division by -1, the only one that can overflow */
-        not     %rax
-        add     $3, %rax
-        jo      .Ldiv
-        OP
-        ret
-.Ldiv:
-        C_JMP(div)
-        EPILOG(div)
-
-
-        /* divexact */
-        PROLOG(divexact)
-        test    $1, %rcx
-        jz      .Ldivexact
-        test    $1, %rdx
-        jz      .Ldivexact
-        mov     %rdx, %r8
-        mov     %rcx, %rax
-        sar     %r8
-        jz      .Ldivexact   /* division by zero */
-        cmp     $-1, %r8
-        je      .Ldivexactneg
-        sar     %rax
-        cqo
-        idiv    %r8
-        sal     %rax
-        inc     %rax
-        OP
-        ret
-.Ldivexactneg:
-        /* division by -1, the only one that can overflow */
-        not     %rax
-        add     $3, %rax
-        jo      .Ldivexact
-        OP
-        ret
-.Ldivexact:
-        C_JMP(divexact)
-        EPILOG(divexact)
-
-
-        /* rem */
-        PROLOG(rem)
-        test    $1, %rcx
-        jz      .Lrem
-        test    $1, %rdx
-        jz      .Lrem
-        mov     %rdx, %r8
-	mov	%rcx, %rax
-        sar     %r8
-        jz      .Lrem   /* division by zero */
-        cmp     $-1, %r8
-        je      .Lremneg
-        sar     %rax
-        cqo
-        idiv    %r8
-        sal     %rdx
-        lea     1(%rdx), %rax
-        OP
-        ret
-.Lremneg:
-        /* division by -1 */
-        mov     $1, %rax
-        OP
-        ret
-.Lrem:
-        C_JMP(rem)
-        EPILOG(rem)
-
-
-        /* bit operations */
-        /* ************** */
-
-
-        /* not */
-        PROLOG(lognot)
-        test    $1, %rcx
-        jz      .Llognot
-        lea     -1(%rcx), %rax
-        not     %rax
-        OP
-        ret
-.Llognot:
-        C_JMP(lognot)
-        EPILOG(lognot)
-
-
-        /* and */
-        PROLOG(logand)
-        mov     %rcx, %rax
-        and     %rdx, %rax
-        test    $1, %rax
-        jz      .Llogand
-        OP
-        ret
-.Llogand:
-        C_JMP(logand)
-        EPILOG(logand)
-
-
-         /* or */
-        PROLOG(logor)
-        test    $1, %rcx
-        jz      .Llogor
-        test    $1, %rdx
-        jz      .Llogor
-        mov     %rcx, %rax
-        or      %rdx, %rax
-        OP
-        ret
-.Llogor:
-        C_JMP(logor)
-        EPILOG(logor)
-
-
-         /* xor */
-        PROLOG(logxor)
-        test    $1, %rcx
-        jz      .Llogxor
-        test    $1, %rdx
-        jz      .Llogxor
-        lea     -1(%rcx), %rax
-        xor     %rdx, %rax
-        OP
-        ret
-.Llogxor:
-        C_JMP(logxor)
-        EPILOG(logxor)
-
-
-         /* shift_left */
-        PROLOG(shift_left)
-        test    $1, %rcx
-        jz      .Lshift_left2
-        lea     -1(%rcx), %rax
-        mov     %rcx, %r9
-        mov     %rdx, %r10
-        sar     %rdx
-        cmp     $63, %rdx
-        jae     .Lshift_left
-        mov     %rdx, %rcx
-        mov     %rax, %r8
-        sal     %cl, %rax
-        mov     %rax, %rdx
-        sar     %cl, %rdx
-        cmp     %r8, %rdx
-        jne     .Lshift_left /* overflow */
-        inc     %rax
-        OP
-        ret
-.Lshift_left:
-        mov     %r9, %rcx
-        mov     %r10, %rdx
-.Lshift_left2:
-       C_JMP(shift_left)
-        EPILOG(shift_left)
-
-
-        /* shift_right */
-        PROLOG(shift_right)
-        test    $1, %rcx
-        jz      .Lshift_right
-        mov     %rcx, %rax
-        mov     %rdx, %rcx
-        sar     %rcx
-        js      .Lshift_right
-        cmp     $63, %rcx
-        jae     .Lshift_right2
-        sar     %cl, %rax
-        or      $1, %rax
-        OP
-        ret
-.Lshift_right2:
-        /* shift by 63 or more */
-        test    %rax, %rax
-        js      .Lshift_right3
-        mov     $1, %rax
-        OP
-        ret
-.Lshift_right3:
-        mov     $-1, %rax
-        OP
-        ret
-.Lshift_right:
-        C_JMP(shift_right)
-        EPILOG(shift_right)
-
diff --git a/z.mlip b/z.mlip
index fa1665d..69b5102 100644
--- a/z.mlip
+++ b/z.mlip
@@ -117,34 +117,34 @@ external of_substring_base
 
 (** {1 Basic arithmetic operations} *)
 
-external succ: t -> t = succ@ASM
+val succ: t -> t
 (** Returns its argument plus one. *)
 
-external pred: t -> t = pred@ASM
+val pred: t -> t
 (** Returns its argument minus one. *)
 
-external abs: t -> t = abs@ASM
+val abs: t -> t
 (** Absolute value. *)
 
-external neg: t -> t = neg@ASM
+val neg: t -> t
 (** Unary negation. *)
 
-external add: t -> t -> t = add@ASM
+val add: t -> t -> t
 (** Addition. *)
 
-external sub: t -> t -> t = sub@ASM
+val sub: t -> t -> t
 (** Subtraction. *)
 
-external mul: t -> t -> t = mul@ASM
+val mul: t -> t -> t
 (** Multiplication. *)
 
-external div: t -> t -> t = div@ASM
+val div: t -> t -> t
 (** Integer division. The result is truncated towards zero
     and obeys the rule of signs.
     Raises [Division_by_zero] if the divisor (second argument) is 0.
  *)
 
-external rem: t -> t -> t = rem@ASM
+val rem: t -> t -> t
 (** Integer remainder. Can raise a [Division_by_zero].
     The result of [rem a b] has the sign of [a], and its absolute value is
     strictly smaller than the absolute value of [b].
@@ -185,7 +185,7 @@ val erem: t -> t -> t
     [a = b * ediv a b + erem a b].  Raises [Division_by_zero] if [b = 0].
  *)
 
-external divexact: t -> t -> t = divexact@ASM
+val divexact: t -> t -> t
 (** [divexact a b] divides [a] by [b], only producing correct result when the
     division is exact, i.e., when [b] evenly divides [a].
     It should be faster than general division.
@@ -214,34 +214,34 @@ external congruent: t -> t -> t -> bool = "ml_z_congruent"
     1s.
  *)
 
-external logand: t -> t -> t = logand@ASM
+val logand: t -> t -> t
 (** Bitwise logical and. *)
-
-external logor: t -> t -> t = logor@ASM
+    
+val logor: t -> t -> t
 (** Bitwise logical or. *)
-
-external logxor: t -> t -> t = logxor@ASM
+    
+val logxor: t -> t -> t
 (** Bitwise logical exclusive or. *)
 
-external lognot: t -> t = lognot@ASM
-(** Bitwise logical negation.
+val lognot: t -> t
+(** Bitwise logical negation. 
     The identity [lognot a]=[-a-1] always hold.
  *)
 
-external shift_left: t -> int -> t = shift_left@ASM
-(** Shifts to the left.
+val shift_left: t -> int -> t
+(** Shifts to the left. 
     Equivalent to a multiplication by a power of 2.
     The second argument must be nonnegative.
  *)
 
-external shift_right: t -> int -> t = shift_right@ASM
-(** Shifts to the right.
-    This is an arithmetic shift,
+val shift_right: t -> int -> t
+(** Shifts to the right. 
+    This is an arithmetic shift, 
     equivalent to a division by a power of 2 with rounding towards -oo.
     The second argument must be nonnegative.
  *)
 
-external shift_right_trunc: t -> int -> t = shift_right_trunc@ASM
+val shift_right_trunc: t -> int -> t
 (** Shifts to the right, rounding towards 0.
     This is equivalent to a division by a power of 2, with truncation.
     The second argument must be nonnegative.
@@ -286,8 +286,8 @@ external hamdist: t -> t -> int = "ml_z_hamdist"
     converted value, an [Overflow] exception is raised.
  *)
 
-external to_int: t -> int = "ml_z_to_int"
-(** Converts to a base integer. May raise [Overflow]. *)
+val to_int: t -> int
+(** Converts to a base integer. May raise an [Overflow]. *)
 
 external to_int32: t -> int32 = "ml_z_to_int32"
 (** Converts to a 32-bit integer. May raise [Overflow]. *)
@@ -632,22 +632,22 @@ external of_bits: string -> t = "ml_z_of_bits"
    [Z.(~$2 + ~$5 * ~$10)].
  *)
 
-external (~-): t -> t = neg@ASM
+val (~-): t -> t
 (** Negation [neg]. *)
 
 val (~+): t -> t
 (** Identity. *)
 
-external (+): t -> t -> t = add@ASM
+val (+): t -> t -> t
 (** Addition [add]. *)
 
-external (-): t -> t -> t = sub@ASM
+val (-): t -> t -> t
 (** Subtraction [sub]. *)
 
-external ( * ): t -> t -> t = mul@ASM
+val ( * ): t -> t -> t
 (** Multiplication [mul]. *)
 
-external (/): t -> t -> t = div@ASM
+val (/): t -> t -> t
 (** Truncated division [div]. *)
 
 external (/>): t -> t -> t = "ml_z_cdiv"
@@ -656,31 +656,32 @@ external (/>): t -> t -> t = "ml_z_cdiv"
 external (/<): t -> t -> t = "ml_z_fdiv"
 (** Flooring division [fdiv]. *)
 
-external (/|): t -> t -> t = divexact@ASM
-(** Exact division [div_exact]. *)
+val (/|): t -> t -> t
+(** Exact division [divexact]. *)
 
-external (mod): t -> t -> t = rem@ASM
+val (mod): t -> t -> t
 (** Remainder [rem]. *)
 
-external (land): t -> t -> t = logand@ASM
+val (land): t -> t -> t
 (** Bit-wise logical and [logand]. *)
 
-external (lor): t -> t -> t = logor@ASM
+val (lor): t -> t -> t
 (** Bit-wise logical inclusive or [logor]. *)
 
-external (lxor): t -> t -> t = logxor@ASM
+val (lxor): t -> t -> t
 (** Bit-wise logical exclusive or [logxor]. *)
 
-external (~!): t -> t = lognot@ASM
+val (~!): t -> t
 (** Bit-wise logical negation [lognot]. *)
 
-external (lsl): t -> int -> t = shift_left@ASM
+val (lsl): t -> int -> t
 (** Bit-wise shift to the left [shift_left]. *)
 
-external (asr): t -> int -> t = shift_right@ASM
+val (asr): t -> int -> t
 (** Bit-wise shift to the right [shift_right]. *)
 
 external (~$): int -> t = "ml_z_of_int" @NOALLOC
+
 (** Conversion from [int] [of_int]. *)
 
 external ( ** ): t -> int -> t = "ml_z_pow"
diff --git a/z.mlp b/z.mlp
index 1e979d4..8660f57 100644
--- a/z.mlp
+++ b/z.mlp
@@ -25,31 +25,193 @@ let _ = init ()
 
 let _ = Callback.register_exception "ml_z_overflow" Overflow
 
-external neg: t -> t = neg@ASM
-external add: t -> t -> t = add@ASM
-external sub: t -> t -> t = sub@ASM
-external mul: t -> t -> t = mul@ASM
-external div: t -> t -> t = div@ASM
+external is_small_int: t -> bool = "%obj_is_int"
+external unsafe_to_int: t -> int = "%identity"
+external unsafe_of_int: int -> t = "%identity"
+
+external c_neg: t -> t = "ml_z_neg"
+
+let neg x =
+  if is_small_int x && unsafe_to_int x <> min_int
+  then unsafe_of_int (- unsafe_to_int x)
+  else c_neg x
+
+external c_add: t -> t -> t = "ml_z_add"
+
+let add x y =
+  if is_small_int x && is_small_int y then begin
+    let z = unsafe_to_int x + unsafe_to_int y in
+    (* Overflow check -- Hacker's Delight, section 2.12 *)
+    if (z lxor unsafe_to_int x) land (z lxor unsafe_to_int y) >= 0 
+    then unsafe_of_int z
+    else c_add x y
+  end else
+    c_add x y
+
+external c_sub: t -> t -> t = "ml_z_sub"
+
+let sub x y =
+  if is_small_int x && is_small_int y then begin
+    let z = unsafe_to_int x - unsafe_to_int y in
+    (* Overflow check -- Hacker's Delight, section 2.12 *)
+    if (unsafe_to_int x lxor unsafe_to_int y)
+       land (z lxor unsafe_to_int x) >= 0 
+    then unsafe_of_int z
+    else c_sub x y
+  end else
+    c_sub x y
+
+external mul_overflows: int -> int -> bool = "ml_z_mul_overflows" @NOALLOC
+external c_mul: t -> t -> t = "ml_z_mul"
+
+let mul x y =
+  if is_small_int x && is_small_int y
+  && not (mul_overflows (unsafe_to_int x) (unsafe_to_int y))
+  then unsafe_of_int (unsafe_to_int x * unsafe_to_int y)
+  else c_mul x y
+
+external c_div: t -> t -> t = "ml_z_div"
+
+let div x y =
+  if is_small_int y then
+    if unsafe_to_int y = -1 then
+      neg x
+    else if is_small_int x then
+      unsafe_of_int (unsafe_to_int x / unsafe_to_int y)
+    else
+      c_div x y
+  else
+    c_div x y
+
 external cdiv: t -> t -> t = "ml_z_cdiv"
 external fdiv: t -> t -> t = "ml_z_fdiv"
-external rem: t -> t -> t = rem@ASM
+
+external c_rem: t -> t -> t = "ml_z_rem"
+
+let rem x y =
+  if is_small_int y then
+    if unsafe_to_int y = -1 then
+      unsafe_of_int 0
+    else if is_small_int x then
+      unsafe_of_int (unsafe_to_int x mod unsafe_to_int y)
+    else
+      c_rem x y
+  else
+    c_rem x y
+
 external div_rem: t -> t -> (t * t) = "ml_z_div_rem"
-external succ: t -> t = succ@ASM
-external pred: t -> t = pred@ASM
-external abs: t -> t = abs@ASM
-external logand: t -> t -> t = logand@ASM
-external logor: t -> t -> t = logor@ASM
-external logxor: t -> t -> t = logxor@ASM
-external lognot: t -> t = lognot@ASM
-external shift_left: t -> int -> t = shift_left@ASM
-external shift_right: t -> int -> t = shift_right@ASM
-external shift_right_trunc: t -> int -> t = shift_right_trunc@ASM
+
+external c_divexact: t -> t -> t = "ml_z_divexact"
+
+let divexact x y =
+  if is_small_int y then
+    if unsafe_to_int y = -1 then
+      neg x
+    else if is_small_int x then
+      unsafe_of_int (unsafe_to_int x / unsafe_to_int y)
+    else
+      c_divexact x y
+  else
+    c_divexact x y
+
+external c_succ: t -> t = "ml_z_succ"
+
+let succ x =
+  if is_small_int x && unsafe_to_int x <> max_int
+  then unsafe_of_int (unsafe_to_int x + 1)
+  else c_succ x
+
+external c_pred: t -> t = "ml_z_pred"
+
+let pred x =
+  if is_small_int x && unsafe_to_int x <> min_int
+  then unsafe_of_int (unsafe_to_int x - 1)
+  else c_pred x
+
+external c_abs: t -> t = "ml_z_abs"
+
+let abs x =
+  if is_small_int x then
+    if unsafe_to_int x >= 0 then x
+    else if unsafe_to_int x <> min_int then
+      unsafe_of_int (- unsafe_to_int x)
+    else
+      c_abs x
+  else
+    c_abs x
+
+external c_logand: t -> t -> t = "ml_z_logand"
+
+let logand x y =
+  if is_small_int x && is_small_int y
+  then unsafe_of_int (unsafe_to_int x land unsafe_to_int y)
+  else c_logand x y
+
+external c_logor: t -> t -> t = "ml_z_logor"
+
+let logor x y =
+  if is_small_int x && is_small_int y
+  then unsafe_of_int (unsafe_to_int x lor unsafe_to_int y)
+  else c_logor x y
+
+external c_logxor: t -> t -> t = "ml_z_logxor"
+
+let logxor x y =
+  if is_small_int x && is_small_int y
+  then unsafe_of_int (unsafe_to_int x lxor unsafe_to_int y)
+  else c_logxor x y
+
+external c_lognot: t -> t = "ml_z_lognot"
+
+let lognot x =
+  if is_small_int x
+  then unsafe_of_int (unsafe_to_int x lxor (-1))
+  else c_lognot x
+
+external c_shift_left: t -> int -> t = "ml_z_shift_left"
+
+let shift_left x y =
+  if is_small_int x && y >= 0 && y < Sys.word_size then begin
+    let z = unsafe_to_int x lsl y in
+    if z asr y = unsafe_to_int x
+    then unsafe_of_int z
+    else c_shift_left x y
+  end else
+    c_shift_left x y
+
+external c_shift_right: t -> int -> t = "ml_z_shift_right"
+
+let shift_right x y =
+  if is_small_int x && y >= 0 then
+    unsafe_of_int
+      (unsafe_to_int x asr (if y < Sys.word_size then y else Sys.word_size - 1))
+  else
+    c_shift_right x y
+
+external c_shift_right_trunc: t -> int -> t = "ml_z_shift_right_trunc"
+
+let shift_right_trunc x y =
+  if is_small_int x && y >= 0 then
+    if y >= Sys.word_size then
+      unsafe_of_int 0
+    else if unsafe_to_int x >= 0 then
+      unsafe_of_int (unsafe_to_int x lsr y)
+    else
+      unsafe_of_int (- ((- unsafe_to_int x) lsr y))
+  else
+    c_shift_right_trunc x y
+
 external of_int: int -> t = "ml_z_of_int" @NOALLOC
 external of_int32: int32 -> t = "ml_z_of_int32"
 external of_int64: int64 -> t = "ml_z_of_int64"
 external of_nativeint: nativeint -> t = "ml_z_of_nativeint"
 external of_float: float -> t = "ml_z_of_float"
-external to_int: t -> int = "ml_z_to_int"
+
+external c_to_int: t -> int = "ml_z_to_int"
+
+let to_int x =
+  if is_small_int x then unsafe_to_int x else c_to_int x
+
 external to_int32: t -> int32 = "ml_z_to_int32"
 external to_int64: t -> int64 = "ml_z_to_int64"
 external to_nativeint: t -> nativeint = "ml_z_to_nativeint"
@@ -75,7 +237,6 @@ external extract: t -> int -> int -> t = "ml_z_extract"
 external powm: t -> t -> t -> t = "ml_z_powm"
 external pow: t -> int -> t = "ml_z_pow"
 external powm_sec: t -> t -> t -> t = "ml_z_powm_sec"
-external divexact: t -> t -> t = divexact@ASM
 external root: t -> int -> t = "ml_z_root"
 external rootrem: t -> int -> t * t = "ml_z_rootrem"
 external invert: t -> t -> t = "ml_z_invert"
@@ -158,7 +319,7 @@ external testbit_internal: t -> int -> bool = "ml_z_testbit" @NOALLOC
 let testbit x n =
   if n >= 0 then testbit_internal x n else invalid_arg "Z.testbit"
 (* The test [n >= 0] is done in Caml rather than in the C stub code
-   so that the latter raises no exceptions and can be declared "noalloc". *)
+   so that the latter raises no exceptions and can be declared @NOALLOC. *)
 
 let is_odd x = testbit_internal x 0
 let is_even x  = not (testbit_internal x 0)
@@ -219,22 +380,22 @@ let sprint () x = to_string x
 let bprint b x = Buffer.add_string b (to_string x)
 let pp_print f x = Format.pp_print_string f (to_string x)
 
-external (~-): t -> t = neg@ASM
+let (~-) = neg
 let (~+) x = x
-external (+): t -> t -> t = add@ASM
-external (-): t -> t -> t = sub@ASM
-external ( * ): t -> t -> t = mul@ASM
-external (/): t -> t -> t = div@ASM
+let (+) = add
+let (-) = sub
+let ( * ) = mul
+let (/) = div
 external (/>): t -> t -> t = "ml_z_cdiv"
 external (/<): t -> t -> t = "ml_z_fdiv"
-external (/|): t -> t -> t = divexact@ASM
-external (mod): t -> t -> t = rem@ASM
-external (land): t -> t -> t = logand@ASM
-external (lor): t -> t -> t = logor@ASM
-external (lxor): t -> t -> t = logxor@ASM
-external (~!): t -> t = lognot@ASM
-external (lsl): t -> int -> t = shift_left@ASM
-external (asr): t -> int -> t = shift_right@ASM
+let (/|) = divexact
+let (mod) = rem
+let (land) = logand
+let (lor) = logor
+let (lxor) = logxor
+let (~!) = lognot
+let (lsl) = shift_left
+let (asr) = shift_right
 external (~$): int -> t = "ml_z_of_int" @NOALLOC
 external ( ** ): t -> int -> t = "ml_z_pow"
 

From 2933e9afcbefce28481a8dc54e2623d8fac71270 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Mon, 4 Jan 2021 12:53:20 +0100
Subject: [PATCH 3/4] Define of_int and ~$ as the %identity primitive

This has been problematic with old versions of OCaml, but this should
be fine since 4.04 (see OCaml PRs #555 and #600) and is certainly fine
since 4.11 (see PR #9316).
---
 z.mlip |  4 ++--
 z.mlp  | 45 ++++++++++++++++++++++-----------------------
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/z.mlip b/z.mlip
index 69b5102..fe6dbca 100644
--- a/z.mlip
+++ b/z.mlip
@@ -65,7 +65,7 @@ val one: t
 val minus_one: t
 (** The number -1. *)
 
-external of_int: int -> t = "ml_z_of_int" @NOALLOC
+external of_int: int -> t = "%identity"
 (** Converts from a base integer. *)
 
 external of_int32: int32 -> t = "ml_z_of_int32"
@@ -680,7 +680,7 @@ val (lsl): t -> int -> t
 val (asr): t -> int -> t
 (** Bit-wise shift to the right [shift_right]. *)
 
-external (~$): int -> t = "ml_z_of_int" @NOALLOC
+external (~$): int -> t = "%identity"
 
 (** Conversion from [int] [of_int]. *)
 
diff --git a/z.mlp b/z.mlp
index 8660f57..753299d 100644
--- a/z.mlp
+++ b/z.mlp
@@ -27,13 +27,13 @@ let _ = Callback.register_exception "ml_z_overflow" Overflow
 
 external is_small_int: t -> bool = "%obj_is_int"
 external unsafe_to_int: t -> int = "%identity"
-external unsafe_of_int: int -> t = "%identity"
+external of_int: int -> t = "%identity"
 
 external c_neg: t -> t = "ml_z_neg"
 
 let neg x =
   if is_small_int x && unsafe_to_int x <> min_int
-  then unsafe_of_int (- unsafe_to_int x)
+  then of_int (- unsafe_to_int x)
   else c_neg x
 
 external c_add: t -> t -> t = "ml_z_add"
@@ -43,7 +43,7 @@ let add x y =
     let z = unsafe_to_int x + unsafe_to_int y in
     (* Overflow check -- Hacker's Delight, section 2.12 *)
     if (z lxor unsafe_to_int x) land (z lxor unsafe_to_int y) >= 0 
-    then unsafe_of_int z
+    then of_int z
     else c_add x y
   end else
     c_add x y
@@ -56,7 +56,7 @@ let sub x y =
     (* Overflow check -- Hacker's Delight, section 2.12 *)
     if (unsafe_to_int x lxor unsafe_to_int y)
        land (z lxor unsafe_to_int x) >= 0 
-    then unsafe_of_int z
+    then of_int z
     else c_sub x y
   end else
     c_sub x y
@@ -67,7 +67,7 @@ external c_mul: t -> t -> t = "ml_z_mul"
 let mul x y =
   if is_small_int x && is_small_int y
   && not (mul_overflows (unsafe_to_int x) (unsafe_to_int y))
-  then unsafe_of_int (unsafe_to_int x * unsafe_to_int y)
+  then of_int (unsafe_to_int x * unsafe_to_int y)
   else c_mul x y
 
 external c_div: t -> t -> t = "ml_z_div"
@@ -77,7 +77,7 @@ let div x y =
     if unsafe_to_int y = -1 then
       neg x
     else if is_small_int x then
-      unsafe_of_int (unsafe_to_int x / unsafe_to_int y)
+      of_int (unsafe_to_int x / unsafe_to_int y)
     else
       c_div x y
   else
@@ -91,9 +91,9 @@ external c_rem: t -> t -> t = "ml_z_rem"
 let rem x y =
   if is_small_int y then
     if unsafe_to_int y = -1 then
-      unsafe_of_int 0
+      of_int 0
     else if is_small_int x then
-      unsafe_of_int (unsafe_to_int x mod unsafe_to_int y)
+      of_int (unsafe_to_int x mod unsafe_to_int y)
     else
       c_rem x y
   else
@@ -108,7 +108,7 @@ let divexact x y =
     if unsafe_to_int y = -1 then
       neg x
     else if is_small_int x then
-      unsafe_of_int (unsafe_to_int x / unsafe_to_int y)
+      of_int (unsafe_to_int x / unsafe_to_int y)
     else
       c_divexact x y
   else
@@ -118,14 +118,14 @@ external c_succ: t -> t = "ml_z_succ"
 
 let succ x =
   if is_small_int x && unsafe_to_int x <> max_int
-  then unsafe_of_int (unsafe_to_int x + 1)
+  then of_int (unsafe_to_int x + 1)
   else c_succ x
 
 external c_pred: t -> t = "ml_z_pred"
 
 let pred x =
   if is_small_int x && unsafe_to_int x <> min_int
-  then unsafe_of_int (unsafe_to_int x - 1)
+  then of_int (unsafe_to_int x - 1)
   else c_pred x
 
 external c_abs: t -> t = "ml_z_abs"
@@ -134,7 +134,7 @@ let abs x =
   if is_small_int x then
     if unsafe_to_int x >= 0 then x
     else if unsafe_to_int x <> min_int then
-      unsafe_of_int (- unsafe_to_int x)
+      of_int (- unsafe_to_int x)
     else
       c_abs x
   else
@@ -144,28 +144,28 @@ external c_logand: t -> t -> t = "ml_z_logand"
 
 let logand x y =
   if is_small_int x && is_small_int y
-  then unsafe_of_int (unsafe_to_int x land unsafe_to_int y)
+  then of_int (unsafe_to_int x land unsafe_to_int y)
   else c_logand x y
 
 external c_logor: t -> t -> t = "ml_z_logor"
 
 let logor x y =
   if is_small_int x && is_small_int y
-  then unsafe_of_int (unsafe_to_int x lor unsafe_to_int y)
+  then of_int (unsafe_to_int x lor unsafe_to_int y)
   else c_logor x y
 
 external c_logxor: t -> t -> t = "ml_z_logxor"
 
 let logxor x y =
   if is_small_int x && is_small_int y
-  then unsafe_of_int (unsafe_to_int x lxor unsafe_to_int y)
+  then of_int (unsafe_to_int x lxor unsafe_to_int y)
   else c_logxor x y
 
 external c_lognot: t -> t = "ml_z_lognot"
 
 let lognot x =
   if is_small_int x
-  then unsafe_of_int (unsafe_to_int x lxor (-1))
+  then of_int (unsafe_to_int x lxor (-1))
   else c_lognot x
 
 external c_shift_left: t -> int -> t = "ml_z_shift_left"
@@ -174,7 +174,7 @@ let shift_left x y =
   if is_small_int x && y >= 0 && y < Sys.word_size then begin
     let z = unsafe_to_int x lsl y in
     if z asr y = unsafe_to_int x
-    then unsafe_of_int z
+    then of_int z
     else c_shift_left x y
   end else
     c_shift_left x y
@@ -183,7 +183,7 @@ external c_shift_right: t -> int -> t = "ml_z_shift_right"
 
 let shift_right x y =
   if is_small_int x && y >= 0 then
-    unsafe_of_int
+    of_int
       (unsafe_to_int x asr (if y < Sys.word_size then y else Sys.word_size - 1))
   else
     c_shift_right x y
@@ -193,15 +193,14 @@ external c_shift_right_trunc: t -> int -> t = "ml_z_shift_right_trunc"
 let shift_right_trunc x y =
   if is_small_int x && y >= 0 then
     if y >= Sys.word_size then
-      unsafe_of_int 0
+      of_int 0
     else if unsafe_to_int x >= 0 then
-      unsafe_of_int (unsafe_to_int x lsr y)
+      of_int (unsafe_to_int x lsr y)
     else
-      unsafe_of_int (- ((- unsafe_to_int x) lsr y))
+      of_int (- ((- unsafe_to_int x) lsr y))
   else
     c_shift_right_trunc x y
 
-external of_int: int -> t = "ml_z_of_int" @NOALLOC
 external of_int32: int32 -> t = "ml_z_of_int32"
 external of_int64: int64 -> t = "ml_z_of_int64"
 external of_nativeint: nativeint -> t = "ml_z_of_nativeint"
@@ -396,7 +395,7 @@ let (lxor) = logxor
 let (~!) = lognot
 let (lsl) = shift_left
 let (asr) = shift_right
-external (~$): int -> t = "ml_z_of_int" @NOALLOC
+external (~$): int -> t = "%identity"
 external ( ** ): t -> int -> t = "ml_z_pow"
 
 module Compare = struct

From d8b77d53ab6a25f5d27ebfec65309ad0b087b597 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Mon, 4 Jan 2021 12:53:20 +0100
Subject: [PATCH 4/4] Update .opam file

Minimum OCaml version is 4.04.0
(to avoid bug with %identity used for Z.of_int).

ocamlfind is a use-time dependency, not just a build-time dependency.
---
 zarith.opam | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/zarith.opam b/zarith.opam
index 8eedde4..8a4e3d2 100644
--- a/zarith.opam
+++ b/zarith.opam
@@ -27,8 +27,8 @@ install: [
   [make "install"]
 ]
 depends: [
-  "ocaml"
-  "ocamlfind" {build}
+  "ocaml"     {>= "4.04.0"}
+  "ocamlfind"
   "conf-gmp"
   "conf-perl" {build}
 ]