diff --git a/kem/ntruprime/doc.go b/kem/ntruprime/doc.go
new file mode 100644
index 000000000..b6f770b32
--- /dev/null
+++ b/kem/ntruprime/doc.go
@@ -0,0 +1,10 @@
+//go:generate go run gen.go
+// Package ntruprime implements the NTRU Prime IND-CCA2 secure
+// key encapsulation mechanism (KEM) as submitted to round 3 of the NIST PQC
+// competition and described in
+//	https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
+// The code is translated from the C reference implementation.
+package ntruprime
diff --git a/kem/ntruprime/gen.go b/kem/ntruprime/gen.go
new file mode 100644
index 000000000..24f863af4
--- /dev/null
+++ b/kem/ntruprime/gen.go
@@ -0,0 +1,139 @@
+//go:build ignore
+// +build ignore
+package main
+import (
+	"bytes"
+	"go/format"
+	"io/ioutil"
+	"strings"
+	"text/template"
+type Instance struct {
+	Name string
+	Hash string
+func (m Instance) Pkg() string {
+	return strings.ToLower(m.Name)
+var (
+	SInstances = []Instance{
+		{Name: "SNTRUP761"},
+		{Name: "SNTRUP653"},
+		{Name: "SNTRUP857"},
+		{Name: "SNTRUP953"},
+		{Name: "SNTRUP1013"},
+		{Name: "SNTRUP1277"},
+	}
+	LPRInstances = []Instance{
+		{Name: "NTRULPR761"},
+		{Name: "NTRULPR653"},
+		{Name: "NTRULPR857"},
+		{Name: "NTRULPR953"},
+		{Name: "NTRULPR1013"},
+		{Name: "NTRULPR1277"},
+	}
+	TemplateWarning = "// Code generated from"
+func main() {
+	generateStreamlinedPackageFiles()
+	generateLPRPackageFiles()
+func generateStreamlinedPackageFiles() {
+	template, err := template.ParseFiles("templates/sntrup.templ.go")
+	if err != nil {
+		panic(err)
+	}
+	for _, mode := range SInstances {
+		buf := new(bytes.Buffer)
+		err := template.Execute(buf, mode)
+		if err != nil {
+			panic(err)
+		}
+		// Formating output code
+		code, err := format.Source(buf.Bytes())
+		if err != nil {
+			panic("error formating code")
+		}
+		res := string(code)
+		offset := strings.Index(res, TemplateWarning)
+		if offset == -1 {
+			panic("Missing template warning in pkg.templ.go")
+		}
+		err = ioutil.WriteFile(mode.Pkg()+"/ntruprime.go", []byte(res[offset:]), 0o644)
+		if err != nil {
+			panic(err)
+		}
+	}
+func generateLPRPackageFiles() {
+	template, err := template.ParseFiles("templates/ntrulpr.templ.go")
+	if err != nil {
+		panic(err)
+	}
+	for _, mode := range LPRInstances {
+		buf := new(bytes.Buffer)
+		err := template.Execute(buf, mode)
+		if err != nil {
+			panic(err)
+		}
+		// Formating output code
+		code, err := format.Source(buf.Bytes())
+		if err != nil {
+			panic("error formating code")
+		}
+		res := string(code)
+		offset := strings.Index(res, TemplateWarning)
+		if offset == -1 {
+			panic("Missing template warning in pkg.templ.go")
+		}
+		err = ioutil.WriteFile(mode.Pkg()+"/ntruprime.go", []byte(res[offset:]), 0o644)
+		if err != nil {
+			panic(err)
+		}
+	}
+func generateKAT() {
+	template, err := template.ParseFiles("templates/kat.templ.go")
+	if err != nil {
+		panic(err)
+	}
+	for _, mode := range SInstances {
+		buf := new(bytes.Buffer)
+		err := template.Execute(buf, mode)
+		if err != nil {
+			panic(err)
+		}
+		// Formating output code
+		code, err := format.Source(buf.Bytes())
+		if err != nil {
+			panic("error formating code")
+		}
+		res := string(code)
+		offset := strings.Index(res, TemplateWarning)
+		if offset == -1 {
+			panic("Missing template warning in pkg.templ.go")
+		}
+		err = ioutil.WriteFile(mode.Pkg()+"/kat_test.go", []byte(res[offset:]), 0o600)
+		if err != nil {
+			panic(err)
+		}
+	}
diff --git a/kem/ntruprime/internal/Decode.go b/kem/ntruprime/internal/Decode.go
new file mode 100644
index 000000000..770112526
--- /dev/null
+++ b/kem/ntruprime/internal/Decode.go
@@ -0,0 +1,67 @@
+package internal
+// TO DO: Optimize the Decode function
+/* Decode(R,s,M,len) */
+/* assumes 0 < M[i] < 16384 */
+/* produces 0 <= R[i] < M[i] */
+func Decode(out []uint16, S []uint8, M []uint16, len int) {
+	index := 0
+	if len == 1 {
+		if M[0] == 1 {
+			out[index] = 0
+		} else if M[0] <= 256 {
+			out[index] = Uint32ModUint14(uint32(S[0]), M[0])
+		} else {
+			out[index] = Uint32ModUint14(uint32(uint16(S[0])+((uint16(S[1]))<<8)), M[0])
+		}
+	}
+	if len > 1 {
+		R2 := make([]uint16, (len+1)/2)
+		M2 := make([]uint16, (len+1)/2)
+		bottomr := make([]uint16, len/2)
+		bottomt := make([]uint32, len/2)
+		i := 0
+		for i = 0; i < len-1; i += 2 {
+			m := uint32(M[i]) * uint32(M[i+1])
+			if m > 256*16383 {
+				bottomt[i/2] = 256 * 256
+				bottomr[i/2] = uint16(S[0]) + 256*uint16(S[1])
+				S = S[2:]
+				M2[i/2] = uint16((((m + 255) >> 8) + 255) >> 8)
+			} else if m >= 16384 {
+				bottomt[i/2] = 256
+				bottomr[i/2] = uint16(S[0])
+				S = S[1:]
+				M2[i/2] = uint16((m + 255) >> 8)
+			} else {
+				bottomt[i/2] = 1
+				bottomr[i/2] = 0
+				M2[i/2] = uint16(m)
+			}
+		}
+		if i < len {
+			M2[i/2] = M[i]
+		}
+		Decode(R2, S, M2, (len+1)/2)
+		for i = 0; i < len-1; i += 2 {
+			r := uint32(bottomr[i/2])
+			var r1 uint32
+			var r0 uint16
+			r += bottomt[i/2] * uint32(R2[i/2])
+			Uint32DivmodUint14(&r1, &r0, r, M[i])
+			r1 = uint32(Uint32ModUint14(r1, M[i+1])) /* only needed for invalid inputs */
+			out[index] = r0
+			index++
+			out[index] = uint16(r1)
+			index++
+		}
+		if i < len {
+			out[index] = R2[i/2]
+		}
+	}
diff --git a/kem/ntruprime/internal/Divmod.go b/kem/ntruprime/internal/Divmod.go
new file mode 100644
index 000000000..8990c599c
--- /dev/null
+++ b/kem/ntruprime/internal/Divmod.go
@@ -0,0 +1,102 @@
+package internal
+CPU division instruction typically takes time depending on x.
+This software is designed to take time independent of x.
+Time still varies depending on m; user must ensure that m is constant.
+Time also varies on CPUs where multiplication is variable-time.
+There could be more CPU issues.
+There could also be compiler issues.
+// q, r = x/m
+// Returns quotient and remainder
+func Uint32DivmodUint14(q *uint32, r *uint16, x uint32, m uint16) {
+	var v uint32 = 0x80000000
+	v /= uint32(m)
+	*q = 0
+	qpart := uint32(uint64(x) * uint64(v) >> 31)
+	x -= qpart * uint32(m)
+	*q += qpart
+	qpart = uint32(uint64(x) * uint64(v) >> 31)
+	x -= qpart * uint32(m)
+	*q += qpart
+	x -= uint32(m)
+	*q += 1
+	mask := -(x >> 31)
+	x += mask & uint32(m)
+	*q += mask
+	*r = uint16(x)
+// Returns the quotient of x/m
+func Uint32DivUint14(x uint32, m uint16) uint32 {
+	var q uint32
+	var r uint16
+	Uint32DivmodUint14(&q, &r, x, m)
+	return q
+// Returns the remainder of x/m
+func Uint32ModUint14(x uint32, m uint16) uint16 {
+	var q uint32
+	var r uint16
+	Uint32DivmodUint14(&q, &r, x, m)
+	return r
+// Calculates quotient and remainder
+func Int32DivmodUint14(q *int32, r *uint16, x int32, m uint16) {
+	var uq, uq2 uint32
+	var ur, ur2 uint16
+	var mask uint32
+	Uint32DivmodUint14(&uq, &ur, 0x80000000+uint32(x), m)
+	Uint32DivmodUint14(&uq2, &ur2, 0x80000000, m)
+	ur -= ur2
+	uq -= uq2
+	mask = -(uint32)(ur >> 15)
+	ur += uint16(mask & uint32(m))
+	uq += mask
+	*r = ur
+	*q = int32(uq)
+// Returns quotient of x/m
+func Int32DivUint14(x int32, m uint16) int32 {
+	var q int32
+	var r uint16
+	Int32DivmodUint14(&q, &r, x, m)
+	return q
+// Returns remainder of x/m
+func Int32ModUint14(x int32, m uint16) uint16 {
+	var q int32
+	var r uint16
+	Int32DivmodUint14(&q, &r, x, m)
+	return r
+// Returns -1 if x!=0; else return 0
+func Int16NonzeroMask(x int16) int {
+	u := uint16(x) /* 0, else 1...65535 */
+	v := uint32(u) /* 0, else 1...65535 */
+	v = -v         /* 0, else 2^32-65535...2^32-1 */
+	v >>= 31       /* 0, else 1 */
+	return -int(v) /* 0, else -1 */
+// Returns -1 if x<0; otherwise return 0
+func Int16NegativeMask(x int16) int {
+	u := uint16(x)
+	u >>= 15
+	return -(int)(u)
diff --git a/kem/ntruprime/internal/Encode.go b/kem/ntruprime/internal/Encode.go
new file mode 100644
index 000000000..effae5e83
--- /dev/null
+++ b/kem/ntruprime/internal/Encode.go
@@ -0,0 +1,42 @@
+package internal
+/* 0 <= R[i] < M[i] < 16384 */
+func Encode(out []uint8, R []uint16, M []uint16, len int) {
+	if len > 1 {
+		R2 := make([]uint16, (len+1)/2)
+		M2 := make([]uint16, (len+1)/2)
+		var i int
+		for ; len > 1; len = (len + 1) / 2 {
+			for i = 0; i < len-1; i += 2 {
+				m0 := uint32(M[i])
+				r := uint32(R[i]) + uint32(R[i+1])*m0
+				m := uint32(M[i+1]) * m0
+				for m >= 16384 {
+					out[0] = uint8(r)
+					out = out[1:]
+					r >>= 8
+					m = (m + 255) >> 8
+				}
+				R2[i/2] = uint16(r)
+				M2[i/2] = uint16(m)
+			}
+			if i < len {
+				R2[i/2] = R[i]
+				M2[i/2] = M[i]
+			}
+			copy(R, R2)
+			copy(M, M2)
+		}
+	}
+	if len == 1 {
+		r := R[0]
+		m := M[0]
+		for m > 1 {
+			out[0] = uint8(r)
+			out = out[1:]
+			r >>= 8
+			m = (m + 255) >> 8
+		}
+	}
diff --git a/kem/ntruprime/kat_test.go b/kem/ntruprime/kat_test.go
new file mode 100644
index 000000000..1cb07c66f
--- /dev/null
+++ b/kem/ntruprime/kat_test.go
@@ -0,0 +1,165 @@
+package ntruprime
+import (
+	"bytes"
+	"crypto/sha512"
+	"fmt"
+	"testing"
+	"github.com/cloudflare/circl/internal/nist"
+	"github.com/cloudflare/circl/kem/schemes"
+	sntrupSchemes "github.com/cloudflare/circl/pke/ntruprime/kem/schemes/sntrup"
+func TestPQCgenStreamlinedKATKem(t *testing.T) {
+	kats := []struct {
+		name string
+		want string
+		p    int
+	}{
+		// Computed from reference implementation
+		{"sntrup653", "82249a46c1bc538e980a2335764c81f70701e6374eed3e1d0457e18c57ec2cee64280dcc75504c2648eb3e37ab3eee37955c1114d851f755a28cc997aba781c8", 653},
+		{"sntrup761", "1a687f42261c47fe4421b35c5d9faf035433fcb2101458680c66c8d54caafec5fb767ea7725d6681ab100912ef06c38d88862a5d2d86786af2989b7dad33813a", 761},
+		{"sntrup857", "79473d6c709dbbc99528886bf2c1d033c409dab1755299154f33232bc57ba1fbe91322fcb741df5252d575a77aa5ca000d52a44c17f1ab64a299884d0f101519", 857},
+		{"sntrup953", "6fe0cf3b8cb62a3011c1870ec9eb3cd8825c06993a213e01ecd0f21f5dee670838fe1c89dd120086a09e8227496a00e22188c8f947618a35764c5a24726ce16c", 953},
+		{"sntrup1013", "195a38eb843fdda53241f65b641ab925f61fb1cf5b0fffcb5891115da121a85174a796d69c75b86c4e92193453155aef9d27ce53aa268076617be55ee6f5da4f", 1013},
+		{"sntrup1277", "ada8a0cbe6b077dc563874fd372f60779bbee1524f576c2931cf9c804163b9632163610d6e380f889170cdf4d9928de0782368a43413f2b6976897ba0e19a828", 1277},
+	}
+	for _, kat := range kats {
+		kat := kat
+		t.Run(kat.name, func(t *testing.T) {
+			testPQCgenStreamlinedKATKem(t, kat.name, kat.want, kat.p)
+		})
+	}
+func TestPQCgenLPRKATKem(t *testing.T) {
+	kats := []struct {
+		name string
+		want string
+		p    int
+	}{
+		// Computed from reference implementation
+		{"ntrulpr653", "30b750e9bcf5a14d0dc10a1a4f0ff4269f7ff7a5b8b835fe7d50d45de3653bbb33c3943fc50759175ba7fef92fd601ac705d7658d3f15a8a7610973ef098e849", 653},
+		{"ntrulpr761", "35f9b8191aef509766019015b7af11dd2afaadf7fca827a9b0a80f7318b7e8325345c64d5b5562ee321465378102850297fbbd70fe78c5bd711e382015189e5a", 761},
+		{"ntrulpr857", "919c675a5b1f642d97b866a284c633f52ad309a1f24a5713fa2f7839a84d07091b2c5a80841ce73a2090cc0ce707d9f262772f730d15905ab238a7be1c1e1e3e", 857},
+		{"ntrulpr953", "2ae003933ca87d873956969977b3d7b5133e42df0868a0cbb77067cf9144ce18b0e4342ba850e2f4d46257aaea23f1e290448e3a34e6774f9594230343de7038", 953},
+		{"ntrulpr1013", "5c054bab923095d3dc4250e5e71923c98b7e3bc778aa4a2a4235b8751106eac2cf0e41dc413d1b6fc7bdc8301a46ca206b19b6301c554cf643d473a55a5940a1", 1013},
+		{"ntrulpr1277", "1ec1702ff090324385fdd98a7f1c1adfe80503593e3531c2c3ed7547df47da38fcdd8dedf142d2b426b3f98015b5c8fe3688b41808c513bdada66a15b7f727ab", 1277},
+	}
+	for _, kat := range kats {
+		kat := kat
+		t.Run(kat.name, func(t *testing.T) {
+			testPQCgenLPRKATKem(t, kat.name, kat.want, kat.p)
+		})
+	}
+func testPQCgenLPRKATKem(t *testing.T, name, expected string, p int) {
+	scheme := schemes.ByName(name)
+	if scheme == nil {
+		t.Fatal()
+	}
+	var seed [48]byte
+	kseed := make([]byte, scheme.SeedSize())
+	eseed := make([]byte, scheme.EncapsulationSeedSize())
+	seedBytes := 32
+	for i := 0; i < 48; i++ {
+		seed[i] = byte(i)
+	}
+	g1 := nist.NewDRBG(&seed)
+	f := sha512.New()
+	fmt.Fprintf(f, "# kem/%s\n\n", name)
+	for i := 0; i < 100; i++ {
+		g1.Fill(seed[:])
+		fmt.Fprintf(f, "count = %d\n", i)
+		fmt.Fprintf(f, "seed = %X\n", seed)
+		g2 := nist.NewDRBG(&seed)
+		g2.Fill(kseed[:seedBytes])
+		for i := 0; i < p; i++ {
+			g2.Fill(kseed[seedBytes+i*4 : seedBytes+i*4+4])
+		}
+		g2.Fill(kseed[seedBytes+p*4:])
+		pk, sk := scheme.DeriveKeyPair(kseed)
+		ppk, _ := pk.MarshalBinary()
+		psk, _ := sk.MarshalBinary()
+		g2.Fill(eseed)
+		ct, ss1, err := scheme.EncapsulateDeterministically(pk, eseed)
+		if err != nil {
+			t.Fatal(err)
+		}
+		ss2, _ := scheme.Decapsulate(sk, ct)
+		if !bytes.Equal(ss1[:], ss2[:]) {
+			t.Fatal()
+		}
+		fmt.Fprintf(f, "pk = %X\n", ppk)
+		fmt.Fprintf(f, "sk = %X\n", psk)
+		fmt.Fprintf(f, "ct = %X\n", ct)
+		fmt.Fprintf(f, "ss = %X\n\n", ss1)
+	}
+	if fmt.Sprintf("%x", f.Sum(nil)) != expected {
+		t.Fatal()
+	}
+func testPQCgenStreamlinedKATKem(t *testing.T, name, expected string, p int) {
+	scheme := sntrupSchemes.ByName(name)
+	if scheme == nil {
+		t.Fatal()
+	}
+	var seed [48]byte
+	eseed := make([]byte, scheme.EncapsulationSeedSize())
+	for i := 0; i < 48; i++ {
+		seed[i] = byte(i)
+	}
+	g1 := nist.NewDRBG(&seed)
+	f := sha512.New()
+	fmt.Fprintf(f, "# kem/%s\n\n", name)
+	for i := 0; i < 100; i++ {
+		g1.Fill(seed[:])
+		fmt.Fprintf(f, "count = %d\n", i)
+		fmt.Fprintf(f, "seed = %X\n", seed)
+		g2 := nist.NewDRBG(&seed)
+		pk, sk := scheme.DeriveKeyPairFromGen(&g2)
+		ppk, _ := pk.MarshalBinary()
+		psk, _ := sk.MarshalBinary()
+		for i := 0; i < p; i++ {
+			g2.Fill(eseed[4*i : 4*i+4])
+		}
+		ct, ss1, _ := scheme.EncapsulateDeterministically(pk, eseed)
+		ss2, _ := scheme.Decapsulate(sk, ct)
+		if !bytes.Equal(ss1, ss2) {
+			t.Fatal()
+		}
+		fmt.Fprintf(f, "pk = %X\n", ppk)
+		fmt.Fprintf(f, "sk = %X\n", psk)
+		fmt.Fprintf(f, "ct = %X\n", ct)
+		fmt.Fprintf(f, "ss = %X\n\n", ss1)
+	}
+	if fmt.Sprintf("%x", f.Sum(nil)) != expected {
+		t.Fatal()
+	}
diff --git a/kem/ntruprime/ntrulpr1013/ntruprime.go b/kem/ntruprime/ntrulpr1013/ntruprime.go
new file mode 100644
index 000000000..fc7566330
--- /dev/null
+++ b/kem/ntruprime/ntrulpr1013/ntruprime.go
@@ -0,0 +1,832 @@
+// Code generated from ntrulpr.templ.go. DO NOT EDIT.
+// Package ntrulpr1013 implements the IND-CCA2 secure key encapsulation mechanism
+// ntrulpr1013 as submitted to round 3 of the NIST PQC competition and
+// described in
+// https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
+package ntrulpr1013
+import (
+	"bytes"
+	"crypto/aes"
+	"crypto/cipher"
+	cryptoRand "crypto/rand"
+	"crypto/sha512"
+	"github.com/cloudflare/circl/kem"
+	"github.com/cloudflare/circl/kem/ntruprime/internal"
+	ntrup "github.com/cloudflare/circl/pke/ntruprime/ntrulpr1013"
+const (
+	p            = ntrup.P
+	q            = ntrup.Q
+	q12          = ((q - 1) / 2)
+	roundedBytes = ntrup.RoundedBytes
+	w    = ntrup.W
+	tau0 = ntrup.Tau0
+	tau1 = ntrup.Tau1
+	tau2 = ntrup.Tau2
+	tau3 = ntrup.Tau3
+	I = ntrup.I
+	hashBytes = 32
+	smallBytes = ((p + 3) / 4)
+	inputsBytes      = I / 8
+	seedBytes        = 32
+	ciphertextsBytes = roundedBytes + topBytes
+	secretKeysBytes  = smallBytes
+	publicKeysBytes  = seedBytes + roundedBytes
+	confirmBytes = 32
+	tau      = 16
+	topBytes = I / 2
+const (
+	// Size of seed for NewKeyFromSeed
+	KeySeedSize = seedBytes + p*4 + inputsBytes
+	// Size of seed for EncapsulateTo.
+	EncapsulationSeedSize = inputsBytes
+	// Size of the established shared key.
+	SharedKeySize = ntrup.SharedKeySize
+	// Size of the encapsulated shared key.
+	CiphertextSize = ntrup.CiphertextSize
+	// Size of a packed public key.
+	PublicKeySize = ntrup.PublicKeySize
+	// Size of a packed private key.
+	PrivateKeySize = ntrup.PrivateKeySize
+type (
+	small int8
+	Fq    int16
+// arithmetic operations over GF(3)
+// A polynomial of R has all of its coefficients in (-1,0,1)
+// F3 is always represented as -1,0,1
+// so ZZ_fromF3 is a no-op
+// x must not be close to top int16
+func f3Freeze(x int16) small {
+	return small(internal.Int32ModUint14(int32(x)+1, 3)) - 1
+/* ----- arithmetic mod q */
+// GF (q)
+// type Fq int16
+/* always represented as -q12...q12 */
+/* so ZZ_fromFq is a no-op */
+/* x must not be close to top int32 */
+func fqFreeze(x int32) Fq {
+	return Fq(internal.Int32ModUint14(x+q12, q) - q12)
+func top(C Fq) int8 {
+	return int8((tau1*(int32)(C+tau0) + 16384) >> 15)
+func right(T int8) Fq {
+	return fqFreeze(tau3*int32(T) - tau2)
+// Polynomials mod q
+// h = f*g in the ring Rq */
+func rqMultSmall(h []Fq, f []Fq, g []small) {
+	fg := make([]Fq, p+p-1)
+	var result Fq
+	for i := 0; i < p; i++ {
+		result = 0
+		for j := 0; j <= i; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p; i < p+p-1; i++ {
+		result = 0
+		for j := i - p + 1; j < p; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p + p - 2; i >= p; i-- {
+		fg[i-p] = fqFreeze(int32(fg[i-p] + fg[i]))
+		fg[i-p+1] = fqFreeze(int32(fg[i-p+1] + fg[i]))
+	}
+	for i := 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// Rounding all coefficients of a polynomial to the nearest multiple of 3
+// Rounded polynomials mod q
+func round(out []Fq, a []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = a[i] - Fq(f3Freeze(int16(a[i])))
+	}
+// Returns (min(x, y), max(x, y)), executes in constant time
+func minmax(x, y *uint32) {
+	var xi uint32 = *x
+	var yi uint32 = *y
+	var xy uint32 = xi ^ yi
+	var c uint32 = yi - xi
+	c ^= xy & (c ^ yi ^ 0x80000000)
+	c >>= 31
+	c = -c
+	c &= xy
+	*x = xi ^ c
+	*y = yi ^ c
+// Sorts the array of unsigned integers
+func cryptoSortUint32(x []uint32, n int) {
+	if n < 2 {
+		return
+	}
+	top := 1
+	for top < n-top {
+		top += top
+	}
+	for p := top; p > 0; p >>= 1 {
+		for i := 0; i < n-p; i++ {
+			if i&p == 0 {
+				minmax(&x[i], &x[i+p])
+			}
+		}
+		for q := top; q > p; q >>= 1 {
+			for i := 0; i < n-q; i++ {
+				if i&p == 0 {
+					minmax(&x[i+p], &x[i+q])
+				}
+			}
+		}
+	}
+// Sorting to generate short polynomial
+func shortFromList(out []small, in []int32) {
+	L := make([]uint32, p)
+	var neg2, neg3 int = -2, -3
+	for i := 0; i < w; i++ {
+		L[i] = uint32(in[i]) & uint32((neg2))
+	}
+	for i := w; i < p; i++ {
+		L[i] = (uint32(in[i]) & uint32((neg3))) | 1
+	}
+	cryptoSortUint32(L, p)
+	for i := 0; i < p; i++ {
+		out[i] = small((L[i] & 3) - 1)
+	}
+// Underlying hash function
+// The input byte array, in, is prepended by the byte b
+// and its SHA-512 hash is calculated
+// Only the first 32 bytes of the hash are returned
+// e.g., b = 0 means out = Hash0(in)
+func hashPrefix(out []byte, b int, in []byte, inlen int) {
+	x := make([]byte, inlen+1)
+	// h := make([]byte, 64)
+	x[0] = byte(b)
+	copy(x[1:], in)
+	hash := sha512.New()
+	hash.Write([]byte(x))
+	h := hash.Sum(nil)
+	copy(out, h[:32])
+// Higher level randomness
+// Returns a random unsigned integer
+// generator can be passed for deterministic number generation
+func urandom32(seed []byte) uint32 {
+	var out [4]uint32
+	out[0] = uint32(seed[0])
+	out[1] = uint32(seed[1]) << 8
+	out[2] = uint32(seed[2]) << 16
+	out[3] = uint32(seed[3]) << 24
+	return out[0] + out[1] + out[2] + out[3]
+// Generates a random short polynomial
+func shortRandom(out []small, seed []byte) {
+	L := make([]uint32, p)
+	if seed != nil {
+		for i := 0; i < p; i++ {
+			L[i] = urandom32(seed[i*4 : i*4+4])
+		}
+	} else {
+		for i := 0; i < p; i++ {
+			L[i] = urandom32(nil)
+		}
+	}
+	// Converts uint32 array to int32 array
+	L_int32 := make([]int32, p)
+	for i := 0; i < len(L); i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// NTRU LPRime Core
+// (G,A),a = keyGen(G); leaves G unchanged
+func keyGen(A []Fq, a []small, G []Fq, seed []byte) {
+	aG := make([]Fq, p)
+	shortRandom(a, seed)
+	rqMultSmall(aG, G, a)
+	round(A, aG)
+// B,T = encrypt(r,(G,A),b)
+func encrypt(B []Fq, T []int8, r []int8, G []Fq, A []Fq, b []small) {
+	bG := make([]Fq, p)
+	bA := make([]Fq, p)
+	rqMultSmall(bG, G, b)
+	round(B, bG)
+	rqMultSmall(bA, A, b)
+	for i := 0; i < I; i++ {
+		T[i] = top(fqFreeze(int32(bA[i]) + int32(r[i])*q12))
+	}
+// r = decrypt((B,T),a)
+func decrypt(r []int8, B []Fq, T []int8, a []small) {
+	aB := make([]Fq, p)
+	rqMultSmall(aB, B, a)
+	for i := 0; i < I; i++ {
+		r[i] = int8(-internal.Int16NegativeMask(int16(fqFreeze(int32(right(T[i])) - int32(aB[i]) + 4*w + 1))))
+	}
+// Encoding I-bit inputs
+type Inputs [I]int8
+func inputsEncode(s []byte, r Inputs) {
+	for i := 0; i < I; i++ {
+		s[i>>3] |= byte(r[i] << (i & 7))
+	}
+// Expand
+func expand(L []uint32, k []byte) {
+	temp := make([]byte, len(L)) // plaintext to be encrypted. Should be of the same size as L (4*P)
+	ciphertext := make([]byte, aes.BlockSize+len(temp))
+	block, err := aes.NewCipher(k[:32])
+	if err != nil {
+		panic(err)
+	}
+	stream := cipher.NewCTR(block, ciphertext[:aes.BlockSize])
+	stream.XORKeyStream(ciphertext[aes.BlockSize:], temp)
+	ciphertext = ciphertext[aes.BlockSize:]
+	// convert byte to uint32
+	for i := 0; i < len(temp); i++ {
+		L[i] = uint32(ciphertext[i])
+	}
+	for i := 0; i < p; i++ {
+		var L0 uint32 = L[4*i]
+		var L1 uint32 = L[4*i+1]
+		var L2 uint32 = L[4*i+2]
+		var L3 uint32 = L[4*i+3]
+		L[i] = L0 + (L1 << 8) + (L2 << 16) + (L3 << 24)
+	}
+// generator, hashShort
+// G = generator(k)
+func generator(G []Fq, k []byte) {
+	L := make([]uint32, 4*p)
+	expand(L, k)
+	for i := 0; i < p; i++ {
+		G[i] = Fq(internal.Uint32ModUint14(L[i], q) - q12)
+	}
+// out = hashShort(r)
+func hashShort(out []small, r Inputs) {
+	s := make([]byte, inputsBytes)
+	inputsEncode(s, r)
+	h := make([]byte, hashBytes)
+	L := make([]uint32, 4*p)
+	L_int32 := make([]int32, p)
+	hashPrefix(h, 5, s, len(s))
+	expand(L, h)
+	// convert []uint32 to []int32
+	for i := 0; i < p; i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// NTRU LPRime expand
+// (S,A),a = xKeyGen()
+func xKeyGen(S []byte, A []Fq, a []small, seed []byte) {
+	copy(S, seed[:seedBytes])
+	seed = seed[seedBytes:]
+	G := make([]Fq, p)
+	generator(G, S)
+	keyGen(A, a, G, seed)
+// B,T = xEncrypt(r,(S,A))
+func xEncrypt(B []Fq, T []int8, r []int8, S []byte, A []Fq) {
+	G := make([]Fq, p)
+	generator(G, S)
+	b := make([]small, p)
+	// convert []int8 to Inputs
+	var r_inputs Inputs
+	for i := 0; i < len(r); i++ {
+		r_inputs[i] = r[i]
+	}
+	hashShort(b, r_inputs)
+	encrypt(B, T, r, G, A, b)
+// Encoding small polynomials (including short polynomials)
+// Transform polynomial in R to bytes
+// these are the only functions that rely on p mod 4 = 1 */
+func smallEncode(s []byte, f []small) {
+	var x small
+	var index int = 0
+	for i := 0; i < p/4; i++ {
+		x = f[index] + 1
+		index++
+		x += (f[index] + 1) << 2
+		index++
+		x += (f[index] + 1) << 4
+		index++
+		x += (f[index] + 1) << 6
+		index++
+		s[0] = byte(x)
+		s = s[1:]
+	}
+	x = f[index] + 1
+	s[0] = byte(x)
+// Transform bytes into polynomial in R
+func smallDecode(f []small, s []byte) {
+	var index int = 0
+	var x byte
+	for i := 0; i < p/4; i++ {
+		x = s[0]
+		s = s[1:]
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		index++
+	}
+	x = s[0]
+	f[index] = ((small)(x & 3)) - 1
+// Encoding rounded polynomials
+// Transform rounded polynomials to bytes
+func roundedEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16((int32((r[i])+q12) * 10923) >> 15)
+		M[i] = (q + 2) / 3
+	}
+	internal.Encode(s, R, M, p)
+// Transform bytes to rounded polynomials
+func roundedDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = (q + 2) / 3
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = Fq(R[i]*3 - q12)
+	}
+// Encoding top polynomials
+func topEncode(s []byte, T []int8) {
+	for i := 0; i < topBytes; i++ {
+		s[i] = byte(T[2*i] + (T[2*i+1] << 4))
+	}
+func topDecode(T []int8, s []byte) {
+	for i := 0; i < topBytes; i++ {
+		T[2*i] = int8(s[i] & 15)
+		T[2*i+1] = int8(s[i] >> 4)
+	}
+// Streamlined NTRU Prime Core plus encoding
+func inputsRandom(r *Inputs, seed []byte) {
+	for i := 0; i < I; i++ {
+		r[i] = int8(1 & (seed[i>>3] >> (i & 7)))
+	}
+// Generates public key and private key
+// pk,sk = zKeyGen()
+func zKeyGen(pk []byte, sk []byte, seed []byte) {
+	A := make([]Fq, p)
+	a := make([]small, p)
+	xKeyGen(pk, A, a, seed)
+	pk = pk[seedBytes:]
+	roundedEncode(pk, A)
+	smallEncode(sk, a)
+// c = zEncrypt(r,pk)
+func zEncrypt(c []byte, r Inputs, pk []byte) {
+	A := make([]Fq, p)
+	B := make([]Fq, p)
+	T := make([]int8, I)
+	roundedDecode(A, pk[seedBytes:])
+	xEncrypt(B, T, r[:], pk[:seedBytes], A)
+	roundedEncode(c, B)
+	c = c[roundedBytes:]
+	topEncode(c, T)
+// r = zDecrypt(C,sk)
+func zDecrypt(r *Inputs, c []byte, sk []byte) {
+	a := make([]small, p)
+	B := make([]Fq, p)
+	T := make([]int8, I)
+	smallDecode(a, sk)
+	roundedDecode(B, c)
+	topDecode(T, c[roundedBytes:])
+	decrypt(r[:], B, T, a)
+// Confirmation hash
+// h = hashConfirm(r,pk,cache); cache is Hash4(pk)
+func hashConfirm(h []byte, r []byte, pk []byte, cache []byte) {
+	x := make([]byte, inputsBytes+hashBytes)
+	copy(x, r)
+	copy(x[inputsBytes:], cache)
+	hashPrefix(h, 2, x, len(x))
+// Session-key hash
+// k = hashSession(b,y,z)
+func hashSession(k []byte, b int, y []byte, z []byte) {
+	x := make([]byte, inputsBytes+ciphertextsBytes+confirmBytes)
+	copy(x[:inputsBytes], y)
+	copy(x[inputsBytes:], z)
+	hashPrefix(k, b, x, len(x))
+//  Streamlined NTRU Prime
+// pk,sk = kemKeyGen()
+func kemKeyGen(pk []byte, sk []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, KeySeedSize)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != KeySeedSize {
+		panic("seed must be of length KeySeedSize")
+	}
+	zKeyGen(pk, sk, seed[:seedBytes+p*4])
+	seed = seed[seedBytes+p*4:]
+	sk = sk[secretKeysBytes:]
+	copy(sk, pk)
+	sk = sk[publicKeysBytes:]
+	copy(sk[:inputsBytes], seed)
+	sk = sk[inputsBytes:]
+	hashPrefix(sk, 4, pk, publicKeysBytes)
+// c,r_enc = hide(r,pk,cache); cache is Hash4(pk)
+func hide(c []byte, r_enc []byte, r Inputs, pk []byte, cache []byte) {
+	inputsEncode(r_enc, r)
+	zEncrypt(c, r, pk)
+	c = c[ciphertextsBytes:]
+	hashConfirm(c, r_enc, pk, cache)
+// Takes as input a public key
+// Returns ciphertext and shared key
+// c,k = encap(pk)
+func (pk PublicKey) EncapsulateTo(c []byte, k []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, EncapsulationSeedSize)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != EncapsulationSeedSize {
+		panic("seed must be of length EncapsulationSeedSize")
+	}
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	r_enc := make([]byte, inputsBytes)
+	cache := make([]byte, hashBytes)
+	var r Inputs
+	hashPrefix(cache, 4, pk.pk[:], publicKeysBytes)
+	inputsRandom(&r, seed)
+	hide(c, r_enc, r, pk.pk[:], cache)
+	hashSession(k, 1, r_enc, c)
+// Returns 0 if matching ciphertext+confirm, else -1
+func ciphertextsDiffMask(c []byte, c2 []byte) int {
+	var differentbits uint16 = 0
+	var len int = ciphertextsBytes + confirmBytes
+	for i := 0; i < len; i++ {
+		differentbits |= uint16((c[i]) ^ (c2[i]))
+	}
+	return int((1 & ((differentbits - 1) >> 8)) - 1)
+// Returns shared key from ciphertext and private key
+// k = decap(c,sk)
+func (priv *PrivateKey) DecapsulateTo(ss []byte, ct []byte) {
+	if len(ct) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(ss) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	sk := priv.sk[:]
+	pk := sk[secretKeysBytes:]
+	rho := pk[publicKeysBytes:]
+	cache := rho[inputsBytes:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cnew := make([]byte, ciphertextsBytes+confirmBytes)
+	zDecrypt(&r, ct, sk)
+	hide(cnew, r_enc, r, pk, cache)
+	var mask int = ciphertextsDiffMask(ct, cnew)
+	for i := 0; i < inputsBytes; i++ {
+		r_enc[i] ^= byte(mask & int(r_enc[i]^rho[i]))
+	}
+	hashSession(ss, 1+mask, r_enc, ct)
+// The structure of the private key is given by the following segments:
+// The secret key, the public key, entropy and the hash of the public key
+type PrivateKey struct {
+	sk [PrivateKeySize]byte
+type PublicKey struct {
+	pk [PublicKeySize]byte
+type scheme struct{}
+var sch kem.Scheme = &scheme{}
+// Scheme returns a KEM interface.
+func Scheme() kem.Scheme { return sch }
+func (*scheme) Name() string               { return "ntrulpr1013" }
+func (*scheme) PublicKeySize() int         { return PublicKeySize }
+func (*scheme) PrivateKeySize() int        { return PrivateKeySize }
+func (*scheme) SeedSize() int              { return KeySeedSize }
+func (*scheme) SharedKeySize() int         { return SharedKeySize }
+func (*scheme) CiphertextSize() int        { return CiphertextSize }
+func (*scheme) EncapsulationSeedSize() int { return EncapsulationSeedSize }
+func (sk *PrivateKey) Scheme() kem.Scheme { return sch }
+func (pk *PublicKey) Scheme() kem.Scheme  { return sch }
+func (sk *PrivateKey) MarshalBinary() ([]byte, error) {
+	var ret [PrivateKeySize]byte
+	copy(ret[:], sk.sk[:])
+	return ret[:], nil
+func (sk *PrivateKey) Equal(other kem.PrivateKey) bool {
+	oth, ok := other.(*PrivateKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(sk.sk[:], oth.sk[:])
+func (pk *PublicKey) Equal(other kem.PublicKey) bool {
+	oth, ok := other.(*PublicKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(pk.pk[:], oth.pk[:])
+func (sk *PrivateKey) Public() kem.PublicKey {
+	var pk [PublicKeySize]byte
+	skey, _ := sk.MarshalBinary()
+	ppk := skey[secretKeysBytes : secretKeysBytes+publicKeysBytes]
+	copy(pk[:], ppk[:])
+	return &PublicKey{pk: pk}
+func (pk *PublicKey) MarshalBinary() ([]byte, error) {
+	var ret [PublicKeySize]byte
+	copy(ret[:], pk.pk[:])
+	return ret[:], nil
+func (*scheme) GenerateKeyPair() (kem.PublicKey, kem.PrivateKey, error) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], nil)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}, nil
+func (*scheme) DeriveKeyPair(seed []byte) (kem.PublicKey, kem.PrivateKey) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], seed)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}
+func (*scheme) Encapsulate(pk kem.PublicKey) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, nil)
+	return ct, ss, nil
+func (*scheme) EncapsulateDeterministically(pk kem.PublicKey, seed []byte) (ct, ss []byte, err error) {
+	if len(seed) != EncapsulationSeedSize {
+		return nil, nil, kem.ErrSeedSize
+	}
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, seed)
+	return ct, ss, nil
+func (*scheme) Decapsulate(sk kem.PrivateKey, ct []byte) ([]byte, error) {
+	ssk, ok := sk.(*PrivateKey)
+	if !ok {
+		return nil, kem.ErrTypeMismatch
+	}
+	if len(ct) != CiphertextSize {
+		return nil, kem.ErrCiphertextSize
+	}
+	ss := [SharedKeySize]byte{}
+	ssk.DecapsulateTo(ss[:], ct)
+	return ss[:], nil
+func (*scheme) UnmarshalBinaryPublicKey(buf []byte) (kem.PublicKey, error) {
+	if len(buf) != PublicKeySize {
+		return nil, kem.ErrPubKeySize
+	}
+	pk := [PublicKeySize]byte{}
+	copy(pk[:], buf)
+	return &PublicKey{pk: pk}, nil
+func (*scheme) UnmarshalBinaryPrivateKey(buf []byte) (kem.PrivateKey, error) {
+	if len(buf) != PrivateKeySize {
+		return nil, kem.ErrPrivKeySize
+	}
+	sk := [PrivateKeySize]byte{}
+	copy(sk[:], buf)
+	return &PrivateKey{sk: sk}, nil
diff --git a/kem/ntruprime/ntrulpr1277/ntruprime.go b/kem/ntruprime/ntrulpr1277/ntruprime.go
new file mode 100644
index 000000000..8634ea739
--- /dev/null
+++ b/kem/ntruprime/ntrulpr1277/ntruprime.go
@@ -0,0 +1,832 @@
+// Code generated from ntrulpr.templ.go. DO NOT EDIT.
+// Package ntrulpr1277 implements the IND-CCA2 secure key encapsulation mechanism
+// ntrulpr1277 as submitted to round 3 of the NIST PQC competition and
+// described in
+// https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
+package ntrulpr1277
+import (
+	"bytes"
+	"crypto/aes"
+	"crypto/cipher"
+	cryptoRand "crypto/rand"
+	"crypto/sha512"
+	"github.com/cloudflare/circl/kem"
+	"github.com/cloudflare/circl/kem/ntruprime/internal"
+	ntrup "github.com/cloudflare/circl/pke/ntruprime/ntrulpr1277"
+const (
+	p            = ntrup.P
+	q            = ntrup.Q
+	q12          = ((q - 1) / 2)
+	roundedBytes = ntrup.RoundedBytes
+	w    = ntrup.W
+	tau0 = ntrup.Tau0
+	tau1 = ntrup.Tau1
+	tau2 = ntrup.Tau2
+	tau3 = ntrup.Tau3
+	I = ntrup.I
+	hashBytes = 32
+	smallBytes = ((p + 3) / 4)
+	inputsBytes      = I / 8
+	seedBytes        = 32
+	ciphertextsBytes = roundedBytes + topBytes
+	secretKeysBytes  = smallBytes
+	publicKeysBytes  = seedBytes + roundedBytes
+	confirmBytes = 32
+	tau      = 16
+	topBytes = I / 2
+const (
+	// Size of seed for NewKeyFromSeed
+	KeySeedSize = seedBytes + p*4 + inputsBytes
+	// Size of seed for EncapsulateTo.
+	EncapsulationSeedSize = inputsBytes
+	// Size of the established shared key.
+	SharedKeySize = ntrup.SharedKeySize
+	// Size of the encapsulated shared key.
+	CiphertextSize = ntrup.CiphertextSize
+	// Size of a packed public key.
+	PublicKeySize = ntrup.PublicKeySize
+	// Size of a packed private key.
+	PrivateKeySize = ntrup.PrivateKeySize
+type (
+	small int8
+	Fq    int16
+// arithmetic operations over GF(3)
+// A polynomial of R has all of its coefficients in (-1,0,1)
+// F3 is always represented as -1,0,1
+// so ZZ_fromF3 is a no-op
+// x must not be close to top int16
+func f3Freeze(x int16) small {
+	return small(internal.Int32ModUint14(int32(x)+1, 3)) - 1
+/* ----- arithmetic mod q */
+// GF (q)
+// type Fq int16
+/* always represented as -q12...q12 */
+/* so ZZ_fromFq is a no-op */
+/* x must not be close to top int32 */
+func fqFreeze(x int32) Fq {
+	return Fq(internal.Int32ModUint14(x+q12, q) - q12)
+func top(C Fq) int8 {
+	return int8((tau1*(int32)(C+tau0) + 16384) >> 15)
+func right(T int8) Fq {
+	return fqFreeze(tau3*int32(T) - tau2)
+// Polynomials mod q
+// h = f*g in the ring Rq */
+func rqMultSmall(h []Fq, f []Fq, g []small) {
+	fg := make([]Fq, p+p-1)
+	var result Fq
+	for i := 0; i < p; i++ {
+		result = 0
+		for j := 0; j <= i; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p; i < p+p-1; i++ {
+		result = 0
+		for j := i - p + 1; j < p; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p + p - 2; i >= p; i-- {
+		fg[i-p] = fqFreeze(int32(fg[i-p] + fg[i]))
+		fg[i-p+1] = fqFreeze(int32(fg[i-p+1] + fg[i]))
+	}
+	for i := 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// Rounding all coefficients of a polynomial to the nearest multiple of 3
+// Rounded polynomials mod q
+func round(out []Fq, a []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = a[i] - Fq(f3Freeze(int16(a[i])))
+	}
+// Returns (min(x, y), max(x, y)), executes in constant time
+func minmax(x, y *uint32) {
+	var xi uint32 = *x
+	var yi uint32 = *y
+	var xy uint32 = xi ^ yi
+	var c uint32 = yi - xi
+	c ^= xy & (c ^ yi ^ 0x80000000)
+	c >>= 31
+	c = -c
+	c &= xy
+	*x = xi ^ c
+	*y = yi ^ c
+// Sorts the array of unsigned integers
+func cryptoSortUint32(x []uint32, n int) {
+	if n < 2 {
+		return
+	}
+	top := 1
+	for top < n-top {
+		top += top
+	}
+	for p := top; p > 0; p >>= 1 {
+		for i := 0; i < n-p; i++ {
+			if i&p == 0 {
+				minmax(&x[i], &x[i+p])
+			}
+		}
+		for q := top; q > p; q >>= 1 {
+			for i := 0; i < n-q; i++ {
+				if i&p == 0 {
+					minmax(&x[i+p], &x[i+q])
+				}
+			}
+		}
+	}
+// Sorting to generate short polynomial
+func shortFromList(out []small, in []int32) {
+	L := make([]uint32, p)
+	var neg2, neg3 int = -2, -3
+	for i := 0; i < w; i++ {
+		L[i] = uint32(in[i]) & uint32((neg2))
+	}
+	for i := w; i < p; i++ {
+		L[i] = (uint32(in[i]) & uint32((neg3))) | 1
+	}
+	cryptoSortUint32(L, p)
+	for i := 0; i < p; i++ {
+		out[i] = small((L[i] & 3) - 1)
+	}
+// Underlying hash function
+// The input byte array, in, is prepended by the byte b
+// and its SHA-512 hash is calculated
+// Only the first 32 bytes of the hash are returned
+// e.g., b = 0 means out = Hash0(in)
+func hashPrefix(out []byte, b int, in []byte, inlen int) {
+	x := make([]byte, inlen+1)
+	// h := make([]byte, 64)
+	x[0] = byte(b)
+	copy(x[1:], in)
+	hash := sha512.New()
+	hash.Write([]byte(x))
+	h := hash.Sum(nil)
+	copy(out, h[:32])
+// Higher level randomness
+// Returns a random unsigned integer
+// generator can be passed for deterministic number generation
+func urandom32(seed []byte) uint32 {
+	var out [4]uint32
+	out[0] = uint32(seed[0])
+	out[1] = uint32(seed[1]) << 8
+	out[2] = uint32(seed[2]) << 16
+	out[3] = uint32(seed[3]) << 24
+	return out[0] + out[1] + out[2] + out[3]
+// Generates a random short polynomial
+func shortRandom(out []small, seed []byte) {
+	L := make([]uint32, p)
+	if seed != nil {
+		for i := 0; i < p; i++ {
+			L[i] = urandom32(seed[i*4 : i*4+4])
+		}
+	} else {
+		for i := 0; i < p; i++ {
+			L[i] = urandom32(nil)
+		}
+	}
+	// Converts uint32 array to int32 array
+	L_int32 := make([]int32, p)
+	for i := 0; i < len(L); i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// NTRU LPRime Core
+// (G,A),a = keyGen(G); leaves G unchanged
+func keyGen(A []Fq, a []small, G []Fq, seed []byte) {
+	aG := make([]Fq, p)
+	shortRandom(a, seed)
+	rqMultSmall(aG, G, a)
+	round(A, aG)
+// B,T = encrypt(r,(G,A),b)
+func encrypt(B []Fq, T []int8, r []int8, G []Fq, A []Fq, b []small) {
+	bG := make([]Fq, p)
+	bA := make([]Fq, p)
+	rqMultSmall(bG, G, b)
+	round(B, bG)
+	rqMultSmall(bA, A, b)
+	for i := 0; i < I; i++ {
+		T[i] = top(fqFreeze(int32(bA[i]) + int32(r[i])*q12))
+	}
+// r = decrypt((B,T),a)
+func decrypt(r []int8, B []Fq, T []int8, a []small) {
+	aB := make([]Fq, p)
+	rqMultSmall(aB, B, a)
+	for i := 0; i < I; i++ {
+		r[i] = int8(-internal.Int16NegativeMask(int16(fqFreeze(int32(right(T[i])) - int32(aB[i]) + 4*w + 1))))
+	}
+// Encoding I-bit inputs
+type Inputs [I]int8
+func inputsEncode(s []byte, r Inputs) {
+	for i := 0; i < I; i++ {
+		s[i>>3] |= byte(r[i] << (i & 7))
+	}
+// Expand
+func expand(L []uint32, k []byte) {
+	temp := make([]byte, len(L)) // plaintext to be encrypted. Should be of the same size as L (4*P)
+	ciphertext := make([]byte, aes.BlockSize+len(temp))
+	block, err := aes.NewCipher(k[:32])
+	if err != nil {
+		panic(err)
+	}
+	stream := cipher.NewCTR(block, ciphertext[:aes.BlockSize])
+	stream.XORKeyStream(ciphertext[aes.BlockSize:], temp)
+	ciphertext = ciphertext[aes.BlockSize:]
+	// convert byte to uint32
+	for i := 0; i < len(temp); i++ {
+		L[i] = uint32(ciphertext[i])
+	}
+	for i := 0; i < p; i++ {
+		var L0 uint32 = L[4*i]
+		var L1 uint32 = L[4*i+1]
+		var L2 uint32 = L[4*i+2]
+		var L3 uint32 = L[4*i+3]
+		L[i] = L0 + (L1 << 8) + (L2 << 16) + (L3 << 24)
+	}
+// generator, hashShort
+// G = generator(k)
+func generator(G []Fq, k []byte) {
+	L := make([]uint32, 4*p)
+	expand(L, k)
+	for i := 0; i < p; i++ {
+		G[i] = Fq(internal.Uint32ModUint14(L[i], q) - q12)
+	}
+// out = hashShort(r)
+func hashShort(out []small, r Inputs) {
+	s := make([]byte, inputsBytes)
+	inputsEncode(s, r)
+	h := make([]byte, hashBytes)
+	L := make([]uint32, 4*p)
+	L_int32 := make([]int32, p)
+	hashPrefix(h, 5, s, len(s))
+	expand(L, h)
+	// convert []uint32 to []int32
+	for i := 0; i < p; i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// NTRU LPRime expand
+// (S,A),a = xKeyGen()
+func xKeyGen(S []byte, A []Fq, a []small, seed []byte) {
+	copy(S, seed[:seedBytes])
+	seed = seed[seedBytes:]
+	G := make([]Fq, p)
+	generator(G, S)
+	keyGen(A, a, G, seed)
+// B,T = xEncrypt(r,(S,A))
+func xEncrypt(B []Fq, T []int8, r []int8, S []byte, A []Fq) {
+	G := make([]Fq, p)
+	generator(G, S)
+	b := make([]small, p)
+	// convert []int8 to Inputs
+	var r_inputs Inputs
+	for i := 0; i < len(r); i++ {
+		r_inputs[i] = r[i]
+	}
+	hashShort(b, r_inputs)
+	encrypt(B, T, r, G, A, b)
+// Encoding small polynomials (including short polynomials)
+// Transform polynomial in R to bytes
+// these are the only functions that rely on p mod 4 = 1 */
+func smallEncode(s []byte, f []small) {
+	var x small
+	var index int = 0
+	for i := 0; i < p/4; i++ {
+		x = f[index] + 1
+		index++
+		x += (f[index] + 1) << 2
+		index++
+		x += (f[index] + 1) << 4
+		index++
+		x += (f[index] + 1) << 6
+		index++
+		s[0] = byte(x)
+		s = s[1:]
+	}
+	x = f[index] + 1
+	s[0] = byte(x)
+// Transform bytes into polynomial in R
+func smallDecode(f []small, s []byte) {
+	var index int = 0
+	var x byte
+	for i := 0; i < p/4; i++ {
+		x = s[0]
+		s = s[1:]
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		index++
+	}
+	x = s[0]
+	f[index] = ((small)(x & 3)) - 1
+// Encoding rounded polynomials
+// Transform rounded polynomials to bytes
+func roundedEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16((int32((r[i])+q12) * 10923) >> 15)
+		M[i] = (q + 2) / 3
+	}
+	internal.Encode(s, R, M, p)
+// Transform bytes to rounded polynomials
+func roundedDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = (q + 2) / 3
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = Fq(R[i]*3 - q12)
+	}
+// Encoding top polynomials
+func topEncode(s []byte, T []int8) {
+	for i := 0; i < topBytes; i++ {
+		s[i] = byte(T[2*i] + (T[2*i+1] << 4))
+	}
+func topDecode(T []int8, s []byte) {
+	for i := 0; i < topBytes; i++ {
+		T[2*i] = int8(s[i] & 15)
+		T[2*i+1] = int8(s[i] >> 4)
+	}
+// Streamlined NTRU Prime Core plus encoding
+func inputsRandom(r *Inputs, seed []byte) {
+	for i := 0; i < I; i++ {
+		r[i] = int8(1 & (seed[i>>3] >> (i & 7)))
+	}
+// Generates public key and private key
+// pk,sk = zKeyGen()
+func zKeyGen(pk []byte, sk []byte, seed []byte) {
+	A := make([]Fq, p)
+	a := make([]small, p)
+	xKeyGen(pk, A, a, seed)
+	pk = pk[seedBytes:]
+	roundedEncode(pk, A)
+	smallEncode(sk, a)
+// c = zEncrypt(r,pk)
+func zEncrypt(c []byte, r Inputs, pk []byte) {
+	A := make([]Fq, p)
+	B := make([]Fq, p)
+	T := make([]int8, I)
+	roundedDecode(A, pk[seedBytes:])
+	xEncrypt(B, T, r[:], pk[:seedBytes], A)
+	roundedEncode(c, B)
+	c = c[roundedBytes:]
+	topEncode(c, T)
+// r = zDecrypt(C,sk)
+func zDecrypt(r *Inputs, c []byte, sk []byte) {
+	a := make([]small, p)
+	B := make([]Fq, p)
+	T := make([]int8, I)
+	smallDecode(a, sk)
+	roundedDecode(B, c)
+	topDecode(T, c[roundedBytes:])
+	decrypt(r[:], B, T, a)
+// Confirmation hash
+// h = hashConfirm(r,pk,cache); cache is Hash4(pk)
+func hashConfirm(h []byte, r []byte, pk []byte, cache []byte) {
+	x := make([]byte, inputsBytes+hashBytes)
+	copy(x, r)
+	copy(x[inputsBytes:], cache)
+	hashPrefix(h, 2, x, len(x))
+// Session-key hash
+// k = hashSession(b,y,z)
+func hashSession(k []byte, b int, y []byte, z []byte) {
+	x := make([]byte, inputsBytes+ciphertextsBytes+confirmBytes)
+	copy(x[:inputsBytes], y)
+	copy(x[inputsBytes:], z)
+	hashPrefix(k, b, x, len(x))
+//  Streamlined NTRU Prime
+// pk,sk = kemKeyGen()
+func kemKeyGen(pk []byte, sk []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, KeySeedSize)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != KeySeedSize {
+		panic("seed must be of length KeySeedSize")
+	}
+	zKeyGen(pk, sk, seed[:seedBytes+p*4])
+	seed = seed[seedBytes+p*4:]
+	sk = sk[secretKeysBytes:]
+	copy(sk, pk)
+	sk = sk[publicKeysBytes:]
+	copy(sk[:inputsBytes], seed)
+	sk = sk[inputsBytes:]
+	hashPrefix(sk, 4, pk, publicKeysBytes)
+// c,r_enc = hide(r,pk,cache); cache is Hash4(pk)
+func hide(c []byte, r_enc []byte, r Inputs, pk []byte, cache []byte) {
+	inputsEncode(r_enc, r)
+	zEncrypt(c, r, pk)
+	c = c[ciphertextsBytes:]
+	hashConfirm(c, r_enc, pk, cache)
+// Takes as input a public key
+// Returns ciphertext and shared key
+// c,k = encap(pk)
+func (pk PublicKey) EncapsulateTo(c []byte, k []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, EncapsulationSeedSize)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != EncapsulationSeedSize {
+		panic("seed must be of length EncapsulationSeedSize")
+	}
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	r_enc := make([]byte, inputsBytes)
+	cache := make([]byte, hashBytes)
+	var r Inputs
+	hashPrefix(cache, 4, pk.pk[:], publicKeysBytes)
+	inputsRandom(&r, seed)
+	hide(c, r_enc, r, pk.pk[:], cache)
+	hashSession(k, 1, r_enc, c)
+// Returns 0 if matching ciphertext+confirm, else -1
+func ciphertextsDiffMask(c []byte, c2 []byte) int {
+	var differentbits uint16 = 0
+	var len int = ciphertextsBytes + confirmBytes
+	for i := 0; i < len; i++ {
+		differentbits |= uint16((c[i]) ^ (c2[i]))
+	}
+	return int((1 & ((differentbits - 1) >> 8)) - 1)
+// Returns shared key from ciphertext and private key
+// k = decap(c,sk)
+func (priv *PrivateKey) DecapsulateTo(ss []byte, ct []byte) {
+	if len(ct) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(ss) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	sk := priv.sk[:]
+	pk := sk[secretKeysBytes:]
+	rho := pk[publicKeysBytes:]
+	cache := rho[inputsBytes:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cnew := make([]byte, ciphertextsBytes+confirmBytes)
+	zDecrypt(&r, ct, sk)
+	hide(cnew, r_enc, r, pk, cache)
+	var mask int = ciphertextsDiffMask(ct, cnew)
+	for i := 0; i < inputsBytes; i++ {
+		r_enc[i] ^= byte(mask & int(r_enc[i]^rho[i]))
+	}
+	hashSession(ss, 1+mask, r_enc, ct)
+// The structure of the private key is given by the following segments:
+// The secret key, the public key, entropy and the hash of the public key
+type PrivateKey struct {
+	sk [PrivateKeySize]byte
+type PublicKey struct {
+	pk [PublicKeySize]byte
+type scheme struct{}
+var sch kem.Scheme = &scheme{}
+// Scheme returns a KEM interface.
+func Scheme() kem.Scheme { return sch }
+func (*scheme) Name() string               { return "ntrulpr1277" }
+func (*scheme) PublicKeySize() int         { return PublicKeySize }
+func (*scheme) PrivateKeySize() int        { return PrivateKeySize }
+func (*scheme) SeedSize() int              { return KeySeedSize }
+func (*scheme) SharedKeySize() int         { return SharedKeySize }
+func (*scheme) CiphertextSize() int        { return CiphertextSize }
+func (*scheme) EncapsulationSeedSize() int { return EncapsulationSeedSize }
+func (sk *PrivateKey) Scheme() kem.Scheme { return sch }
+func (pk *PublicKey) Scheme() kem.Scheme  { return sch }
+func (sk *PrivateKey) MarshalBinary() ([]byte, error) {
+	var ret [PrivateKeySize]byte
+	copy(ret[:], sk.sk[:])
+	return ret[:], nil
+func (sk *PrivateKey) Equal(other kem.PrivateKey) bool {
+	oth, ok := other.(*PrivateKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(sk.sk[:], oth.sk[:])
+func (pk *PublicKey) Equal(other kem.PublicKey) bool {
+	oth, ok := other.(*PublicKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(pk.pk[:], oth.pk[:])
+func (sk *PrivateKey) Public() kem.PublicKey {
+	var pk [PublicKeySize]byte
+	skey, _ := sk.MarshalBinary()
+	ppk := skey[secretKeysBytes : secretKeysBytes+publicKeysBytes]
+	copy(pk[:], ppk[:])
+	return &PublicKey{pk: pk}
+func (pk *PublicKey) MarshalBinary() ([]byte, error) {
+	var ret [PublicKeySize]byte
+	copy(ret[:], pk.pk[:])
+	return ret[:], nil
+func (*scheme) GenerateKeyPair() (kem.PublicKey, kem.PrivateKey, error) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], nil)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}, nil
+func (*scheme) DeriveKeyPair(seed []byte) (kem.PublicKey, kem.PrivateKey) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], seed)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}
+func (*scheme) Encapsulate(pk kem.PublicKey) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, nil)
+	return ct, ss, nil
+func (*scheme) EncapsulateDeterministically(pk kem.PublicKey, seed []byte) (ct, ss []byte, err error) {
+	if len(seed) != EncapsulationSeedSize {
+		return nil, nil, kem.ErrSeedSize
+	}
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, seed)
+	return ct, ss, nil
+func (*scheme) Decapsulate(sk kem.PrivateKey, ct []byte) ([]byte, error) {
+	ssk, ok := sk.(*PrivateKey)
+	if !ok {
+		return nil, kem.ErrTypeMismatch
+	}
+	if len(ct) != CiphertextSize {
+		return nil, kem.ErrCiphertextSize
+	}
+	ss := [SharedKeySize]byte{}
+	ssk.DecapsulateTo(ss[:], ct)
+	return ss[:], nil
+func (*scheme) UnmarshalBinaryPublicKey(buf []byte) (kem.PublicKey, error) {
+	if len(buf) != PublicKeySize {
+		return nil, kem.ErrPubKeySize
+	}
+	pk := [PublicKeySize]byte{}
+	copy(pk[:], buf)
+	return &PublicKey{pk: pk}, nil
+func (*scheme) UnmarshalBinaryPrivateKey(buf []byte) (kem.PrivateKey, error) {
+	if len(buf) != PrivateKeySize {
+		return nil, kem.ErrPrivKeySize
+	}
+	sk := [PrivateKeySize]byte{}
+	copy(sk[:], buf)
+	return &PrivateKey{sk: sk}, nil
diff --git a/kem/ntruprime/ntrulpr653/ntruprime.go b/kem/ntruprime/ntrulpr653/ntruprime.go
new file mode 100644
index 000000000..758f14313
--- /dev/null
+++ b/kem/ntruprime/ntrulpr653/ntruprime.go
@@ -0,0 +1,832 @@
+// Code generated from ntrulpr.templ.go. DO NOT EDIT.
+// Package ntrulpr653 implements the IND-CCA2 secure key encapsulation mechanism
+// ntrulpr653 as submitted to round 3 of the NIST PQC competition and
+// described in
+// https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
+package ntrulpr653
+import (
+	"bytes"
+	"crypto/aes"
+	"crypto/cipher"
+	cryptoRand "crypto/rand"
+	"crypto/sha512"
+	"github.com/cloudflare/circl/kem"
+	"github.com/cloudflare/circl/kem/ntruprime/internal"
+	ntrup "github.com/cloudflare/circl/pke/ntruprime/ntrulpr653"
+const (
+	p            = ntrup.P
+	q            = ntrup.Q
+	q12          = ((q - 1) / 2)
+	roundedBytes = ntrup.RoundedBytes
+	w    = ntrup.W
+	tau0 = ntrup.Tau0
+	tau1 = ntrup.Tau1
+	tau2 = ntrup.Tau2
+	tau3 = ntrup.Tau3
+	I = ntrup.I
+	hashBytes = 32
+	smallBytes = ((p + 3) / 4)
+	inputsBytes      = I / 8
+	seedBytes        = 32
+	ciphertextsBytes = roundedBytes + topBytes
+	secretKeysBytes  = smallBytes
+	publicKeysBytes  = seedBytes + roundedBytes
+	confirmBytes = 32
+	tau      = 16
+	topBytes = I / 2
+const (
+	// Size of seed for NewKeyFromSeed
+	KeySeedSize = seedBytes + p*4 + inputsBytes
+	// Size of seed for EncapsulateTo.
+	EncapsulationSeedSize = inputsBytes
+	// Size of the established shared key.
+	SharedKeySize = ntrup.SharedKeySize
+	// Size of the encapsulated shared key.
+	CiphertextSize = ntrup.CiphertextSize
+	// Size of a packed public key.
+	PublicKeySize = ntrup.PublicKeySize
+	// Size of a packed private key.
+	PrivateKeySize = ntrup.PrivateKeySize
+type (
+	small int8
+	Fq    int16
+// arithmetic operations over GF(3)
+// A polynomial of R has all of its coefficients in (-1,0,1)
+// F3 is always represented as -1,0,1
+// so ZZ_fromF3 is a no-op
+// x must not be close to top int16
+func f3Freeze(x int16) small {
+	return small(internal.Int32ModUint14(int32(x)+1, 3)) - 1
+/* ----- arithmetic mod q */
+// GF (q)
+// type Fq int16
+/* always represented as -q12...q12 */
+/* so ZZ_fromFq is a no-op */
+/* x must not be close to top int32 */
+func fqFreeze(x int32) Fq {
+	return Fq(internal.Int32ModUint14(x+q12, q) - q12)
+func top(C Fq) int8 {
+	return int8((tau1*(int32)(C+tau0) + 16384) >> 15)
+func right(T int8) Fq {
+	return fqFreeze(tau3*int32(T) - tau2)
+// Polynomials mod q
+// h = f*g in the ring Rq */
+func rqMultSmall(h []Fq, f []Fq, g []small) {
+	fg := make([]Fq, p+p-1)
+	var result Fq
+	for i := 0; i < p; i++ {
+		result = 0
+		for j := 0; j <= i; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p; i < p+p-1; i++ {
+		result = 0
+		for j := i - p + 1; j < p; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p + p - 2; i >= p; i-- {
+		fg[i-p] = fqFreeze(int32(fg[i-p] + fg[i]))
+		fg[i-p+1] = fqFreeze(int32(fg[i-p+1] + fg[i]))
+	}
+	for i := 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// Rounding all coefficients of a polynomial to the nearest multiple of 3
+// Rounded polynomials mod q
+func round(out []Fq, a []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = a[i] - Fq(f3Freeze(int16(a[i])))
+	}
+// Returns (min(x, y), max(x, y)), executes in constant time
+func minmax(x, y *uint32) {
+	var xi uint32 = *x
+	var yi uint32 = *y
+	var xy uint32 = xi ^ yi
+	var c uint32 = yi - xi
+	c ^= xy & (c ^ yi ^ 0x80000000)
+	c >>= 31
+	c = -c
+	c &= xy
+	*x = xi ^ c
+	*y = yi ^ c
+// Sorts the array of unsigned integers
+func cryptoSortUint32(x []uint32, n int) {
+	if n < 2 {
+		return
+	}
+	top := 1
+	for top < n-top {
+		top += top
+	}
+	for p := top; p > 0; p >>= 1 {
+		for i := 0; i < n-p; i++ {
+			if i&p == 0 {
+				minmax(&x[i], &x[i+p])
+			}
+		}
+		for q := top; q > p; q >>= 1 {
+			for i := 0; i < n-q; i++ {
+				if i&p == 0 {
+					minmax(&x[i+p], &x[i+q])
+				}
+			}
+		}
+	}
+// Sorting to generate short polynomial
+func shortFromList(out []small, in []int32) {
+	L := make([]uint32, p)
+	var neg2, neg3 int = -2, -3
+	for i := 0; i < w; i++ {
+		L[i] = uint32(in[i]) & uint32((neg2))
+	}
+	for i := w; i < p; i++ {
+		L[i] = (uint32(in[i]) & uint32((neg3))) | 1
+	}
+	cryptoSortUint32(L, p)
+	for i := 0; i < p; i++ {
+		out[i] = small((L[i] & 3) - 1)
+	}
+// Underlying hash function
+// The input byte array, in, is prepended by the byte b
+// and its SHA-512 hash is calculated
+// Only the first 32 bytes of the hash are returned
+// e.g., b = 0 means out = Hash0(in)
+func hashPrefix(out []byte, b int, in []byte, inlen int) {
+	x := make([]byte, inlen+1)
+	// h := make([]byte, 64)
+	x[0] = byte(b)
+	copy(x[1:], in)
+	hash := sha512.New()
+	hash.Write([]byte(x))
+	h := hash.Sum(nil)
+	copy(out, h[:32])
+// Higher level randomness
+// Returns a random unsigned integer
+// generator can be passed for deterministic number generation
+func urandom32(seed []byte) uint32 {
+	var out [4]uint32
+	out[0] = uint32(seed[0])
+	out[1] = uint32(seed[1]) << 8
+	out[2] = uint32(seed[2]) << 16
+	out[3] = uint32(seed[3]) << 24
+	return out[0] + out[1] + out[2] + out[3]
+// Generates a random short polynomial
+func shortRandom(out []small, seed []byte) {
+	L := make([]uint32, p)
+	if seed != nil {
+		for i := 0; i < p; i++ {
+			L[i] = urandom32(seed[i*4 : i*4+4])
+		}
+	} else {
+		for i := 0; i < p; i++ {
+			L[i] = urandom32(nil)
+		}
+	}
+	// Converts uint32 array to int32 array
+	L_int32 := make([]int32, p)
+	for i := 0; i < len(L); i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// NTRU LPRime Core
+// (G,A),a = keyGen(G); leaves G unchanged
+func keyGen(A []Fq, a []small, G []Fq, seed []byte) {
+	aG := make([]Fq, p)
+	shortRandom(a, seed)
+	rqMultSmall(aG, G, a)
+	round(A, aG)
+// B,T = encrypt(r,(G,A),b)
+func encrypt(B []Fq, T []int8, r []int8, G []Fq, A []Fq, b []small) {
+	bG := make([]Fq, p)
+	bA := make([]Fq, p)
+	rqMultSmall(bG, G, b)
+	round(B, bG)
+	rqMultSmall(bA, A, b)
+	for i := 0; i < I; i++ {
+		T[i] = top(fqFreeze(int32(bA[i]) + int32(r[i])*q12))
+	}
+// r = decrypt((B,T),a)
+func decrypt(r []int8, B []Fq, T []int8, a []small) {
+	aB := make([]Fq, p)
+	rqMultSmall(aB, B, a)
+	for i := 0; i < I; i++ {
+		r[i] = int8(-internal.Int16NegativeMask(int16(fqFreeze(int32(right(T[i])) - int32(aB[i]) + 4*w + 1))))
+	}
+// Encoding I-bit inputs
+type Inputs [I]int8
+func inputsEncode(s []byte, r Inputs) {
+	for i := 0; i < I; i++ {
+		s[i>>3] |= byte(r[i] << (i & 7))
+	}
+// Expand
+func expand(L []uint32, k []byte) {
+	temp := make([]byte, len(L)) // plaintext to be encrypted. Should be of the same size as L (4*P)
+	ciphertext := make([]byte, aes.BlockSize+len(temp))
+	block, err := aes.NewCipher(k[:32])
+	if err != nil {
+		panic(err)
+	}
+	stream := cipher.NewCTR(block, ciphertext[:aes.BlockSize])
+	stream.XORKeyStream(ciphertext[aes.BlockSize:], temp)
+	ciphertext = ciphertext[aes.BlockSize:]
+	// convert byte to uint32
+	for i := 0; i < len(temp); i++ {
+		L[i] = uint32(ciphertext[i])
+	}
+	for i := 0; i < p; i++ {
+		var L0 uint32 = L[4*i]
+		var L1 uint32 = L[4*i+1]
+		var L2 uint32 = L[4*i+2]
+		var L3 uint32 = L[4*i+3]
+		L[i] = L0 + (L1 << 8) + (L2 << 16) + (L3 << 24)
+	}
+// generator, hashShort
+// G = generator(k)
+func generator(G []Fq, k []byte) {
+	L := make([]uint32, 4*p)
+	expand(L, k)
+	for i := 0; i < p; i++ {
+		G[i] = Fq(internal.Uint32ModUint14(L[i], q) - q12)
+	}
+// out = hashShort(r)
+func hashShort(out []small, r Inputs) {
+	s := make([]byte, inputsBytes)
+	inputsEncode(s, r)
+	h := make([]byte, hashBytes)
+	L := make([]uint32, 4*p)
+	L_int32 := make([]int32, p)
+	hashPrefix(h, 5, s, len(s))
+	expand(L, h)
+	// convert []uint32 to []int32
+	for i := 0; i < p; i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// NTRU LPRime expand
+// (S,A),a = xKeyGen()
+func xKeyGen(S []byte, A []Fq, a []small, seed []byte) {
+	copy(S, seed[:seedBytes])
+	seed = seed[seedBytes:]
+	G := make([]Fq, p)
+	generator(G, S)
+	keyGen(A, a, G, seed)
+// B,T = xEncrypt(r,(S,A))
+func xEncrypt(B []Fq, T []int8, r []int8, S []byte, A []Fq) {
+	G := make([]Fq, p)
+	generator(G, S)
+	b := make([]small, p)
+	// convert []int8 to Inputs
+	var r_inputs Inputs
+	for i := 0; i < len(r); i++ {
+		r_inputs[i] = r[i]
+	}
+	hashShort(b, r_inputs)
+	encrypt(B, T, r, G, A, b)
+// Encoding small polynomials (including short polynomials)
+// Transform polynomial in R to bytes
+// these are the only functions that rely on p mod 4 = 1 */
+func smallEncode(s []byte, f []small) {
+	var x small
+	var index int = 0
+	for i := 0; i < p/4; i++ {
+		x = f[index] + 1
+		index++
+		x += (f[index] + 1) << 2
+		index++
+		x += (f[index] + 1) << 4
+		index++
+		x += (f[index] + 1) << 6
+		index++
+		s[0] = byte(x)
+		s = s[1:]
+	}
+	x = f[index] + 1
+	s[0] = byte(x)
+// Transform bytes into polynomial in R
+func smallDecode(f []small, s []byte) {
+	var index int = 0
+	var x byte
+	for i := 0; i < p/4; i++ {
+		x = s[0]
+		s = s[1:]
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		index++
+	}
+	x = s[0]
+	f[index] = ((small)(x & 3)) - 1
+// Encoding rounded polynomials
+// Transform rounded polynomials to bytes
+func roundedEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16((int32((r[i])+q12) * 10923) >> 15)
+		M[i] = (q + 2) / 3
+	}
+	internal.Encode(s, R, M, p)
+// Transform bytes to rounded polynomials
+func roundedDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = (q + 2) / 3
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = Fq(R[i]*3 - q12)
+	}
+// Encoding top polynomials
+func topEncode(s []byte, T []int8) {
+	for i := 0; i < topBytes; i++ {
+		s[i] = byte(T[2*i] + (T[2*i+1] << 4))
+	}
+func topDecode(T []int8, s []byte) {
+	for i := 0; i < topBytes; i++ {
+		T[2*i] = int8(s[i] & 15)
+		T[2*i+1] = int8(s[i] >> 4)
+	}
+// Streamlined NTRU Prime Core plus encoding
+func inputsRandom(r *Inputs, seed []byte) {
+	for i := 0; i < I; i++ {
+		r[i] = int8(1 & (seed[i>>3] >> (i & 7)))
+	}
+// Generates public key and private key
+// pk,sk = zKeyGen()
+func zKeyGen(pk []byte, sk []byte, seed []byte) {
+	A := make([]Fq, p)
+	a := make([]small, p)
+	xKeyGen(pk, A, a, seed)
+	pk = pk[seedBytes:]
+	roundedEncode(pk, A)
+	smallEncode(sk, a)
+// c = zEncrypt(r,pk)
+func zEncrypt(c []byte, r Inputs, pk []byte) {
+	A := make([]Fq, p)
+	B := make([]Fq, p)
+	T := make([]int8, I)
+	roundedDecode(A, pk[seedBytes:])
+	xEncrypt(B, T, r[:], pk[:seedBytes], A)
+	roundedEncode(c, B)
+	c = c[roundedBytes:]
+	topEncode(c, T)
+// r = zDecrypt(C,sk)
+func zDecrypt(r *Inputs, c []byte, sk []byte) {
+	a := make([]small, p)
+	B := make([]Fq, p)
+	T := make([]int8, I)
+	smallDecode(a, sk)
+	roundedDecode(B, c)
+	topDecode(T, c[roundedBytes:])
+	decrypt(r[:], B, T, a)
+// Confirmation hash
+// h = hashConfirm(r,pk,cache); cache is Hash4(pk)
+func hashConfirm(h []byte, r []byte, pk []byte, cache []byte) {
+	x := make([]byte, inputsBytes+hashBytes)
+	copy(x, r)
+	copy(x[inputsBytes:], cache)
+	hashPrefix(h, 2, x, len(x))
+// Session-key hash
+// k = hashSession(b,y,z)
+func hashSession(k []byte, b int, y []byte, z []byte) {
+	x := make([]byte, inputsBytes+ciphertextsBytes+confirmBytes)
+	copy(x[:inputsBytes], y)
+	copy(x[inputsBytes:], z)
+	hashPrefix(k, b, x, len(x))
+//  Streamlined NTRU Prime
+// pk,sk = kemKeyGen()
+func kemKeyGen(pk []byte, sk []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, KeySeedSize)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != KeySeedSize {
+		panic("seed must be of length KeySeedSize")
+	}
+	zKeyGen(pk, sk, seed[:seedBytes+p*4])
+	seed = seed[seedBytes+p*4:]
+	sk = sk[secretKeysBytes:]
+	copy(sk, pk)
+	sk = sk[publicKeysBytes:]
+	copy(sk[:inputsBytes], seed)
+	sk = sk[inputsBytes:]
+	hashPrefix(sk, 4, pk, publicKeysBytes)
+// c,r_enc = hide(r,pk,cache); cache is Hash4(pk)
+func hide(c []byte, r_enc []byte, r Inputs, pk []byte, cache []byte) {
+	inputsEncode(r_enc, r)
+	zEncrypt(c, r, pk)
+	c = c[ciphertextsBytes:]
+	hashConfirm(c, r_enc, pk, cache)
+// Takes as input a public key
+// Returns ciphertext and shared key
+// c,k = encap(pk)
+func (pk PublicKey) EncapsulateTo(c []byte, k []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, EncapsulationSeedSize)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != EncapsulationSeedSize {
+		panic("seed must be of length EncapsulationSeedSize")
+	}
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	r_enc := make([]byte, inputsBytes)
+	cache := make([]byte, hashBytes)
+	var r Inputs
+	hashPrefix(cache, 4, pk.pk[:], publicKeysBytes)
+	inputsRandom(&r, seed)
+	hide(c, r_enc, r, pk.pk[:], cache)
+	hashSession(k, 1, r_enc, c)
+// Returns 0 if matching ciphertext+confirm, else -1
+func ciphertextsDiffMask(c []byte, c2 []byte) int {
+	var differentbits uint16 = 0
+	var len int = ciphertextsBytes + confirmBytes
+	for i := 0; i < len; i++ {
+		differentbits |= uint16((c[i]) ^ (c2[i]))
+	}
+	return int((1 & ((differentbits - 1) >> 8)) - 1)
+// Returns shared key from ciphertext and private key
+// k = decap(c,sk)
+func (priv *PrivateKey) DecapsulateTo(ss []byte, ct []byte) {
+	if len(ct) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(ss) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	sk := priv.sk[:]
+	pk := sk[secretKeysBytes:]
+	rho := pk[publicKeysBytes:]
+	cache := rho[inputsBytes:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cnew := make([]byte, ciphertextsBytes+confirmBytes)
+	zDecrypt(&r, ct, sk)
+	hide(cnew, r_enc, r, pk, cache)
+	var mask int = ciphertextsDiffMask(ct, cnew)
+	for i := 0; i < inputsBytes; i++ {
+		r_enc[i] ^= byte(mask & int(r_enc[i]^rho[i]))
+	}
+	hashSession(ss, 1+mask, r_enc, ct)
+// The structure of the private key is given by the following segments:
+// The secret key, the public key, entropy and the hash of the public key
+type PrivateKey struct {
+	sk [PrivateKeySize]byte
+type PublicKey struct {
+	pk [PublicKeySize]byte
+type scheme struct{}
+var sch kem.Scheme = &scheme{}
+// Scheme returns a KEM interface.
+func Scheme() kem.Scheme { return sch }
+func (*scheme) Name() string               { return "ntrulpr653" }
+func (*scheme) PublicKeySize() int         { return PublicKeySize }
+func (*scheme) PrivateKeySize() int        { return PrivateKeySize }
+func (*scheme) SeedSize() int              { return KeySeedSize }
+func (*scheme) SharedKeySize() int         { return SharedKeySize }
+func (*scheme) CiphertextSize() int        { return CiphertextSize }
+func (*scheme) EncapsulationSeedSize() int { return EncapsulationSeedSize }
+func (sk *PrivateKey) Scheme() kem.Scheme { return sch }
+func (pk *PublicKey) Scheme() kem.Scheme  { return sch }
+func (sk *PrivateKey) MarshalBinary() ([]byte, error) {
+	var ret [PrivateKeySize]byte
+	copy(ret[:], sk.sk[:])
+	return ret[:], nil
+func (sk *PrivateKey) Equal(other kem.PrivateKey) bool {
+	oth, ok := other.(*PrivateKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(sk.sk[:], oth.sk[:])
+func (pk *PublicKey) Equal(other kem.PublicKey) bool {
+	oth, ok := other.(*PublicKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(pk.pk[:], oth.pk[:])
+func (sk *PrivateKey) Public() kem.PublicKey {
+	var pk [PublicKeySize]byte
+	skey, _ := sk.MarshalBinary()
+	ppk := skey[secretKeysBytes : secretKeysBytes+publicKeysBytes]
+	copy(pk[:], ppk[:])
+	return &PublicKey{pk: pk}
+func (pk *PublicKey) MarshalBinary() ([]byte, error) {
+	var ret [PublicKeySize]byte
+	copy(ret[:], pk.pk[:])
+	return ret[:], nil
+func (*scheme) GenerateKeyPair() (kem.PublicKey, kem.PrivateKey, error) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], nil)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}, nil
+func (*scheme) DeriveKeyPair(seed []byte) (kem.PublicKey, kem.PrivateKey) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], seed)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}
+func (*scheme) Encapsulate(pk kem.PublicKey) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, nil)
+	return ct, ss, nil
+func (*scheme) EncapsulateDeterministically(pk kem.PublicKey, seed []byte) (ct, ss []byte, err error) {
+	if len(seed) != EncapsulationSeedSize {
+		return nil, nil, kem.ErrSeedSize
+	}
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, seed)
+	return ct, ss, nil
+func (*scheme) Decapsulate(sk kem.PrivateKey, ct []byte) ([]byte, error) {
+	ssk, ok := sk.(*PrivateKey)
+	if !ok {
+		return nil, kem.ErrTypeMismatch
+	}
+	if len(ct) != CiphertextSize {
+		return nil, kem.ErrCiphertextSize
+	}
+	ss := [SharedKeySize]byte{}
+	ssk.DecapsulateTo(ss[:], ct)
+	return ss[:], nil
+func (*scheme) UnmarshalBinaryPublicKey(buf []byte) (kem.PublicKey, error) {
+	if len(buf) != PublicKeySize {
+		return nil, kem.ErrPubKeySize
+	}
+	pk := [PublicKeySize]byte{}
+	copy(pk[:], buf)
+	return &PublicKey{pk: pk}, nil
+func (*scheme) UnmarshalBinaryPrivateKey(buf []byte) (kem.PrivateKey, error) {
+	if len(buf) != PrivateKeySize {
+		return nil, kem.ErrPrivKeySize
+	}
+	sk := [PrivateKeySize]byte{}
+	copy(sk[:], buf)
+	return &PrivateKey{sk: sk}, nil
diff --git a/kem/ntruprime/ntrulpr761/ntruprime.go b/kem/ntruprime/ntrulpr761/ntruprime.go
new file mode 100644
index 000000000..214afc58b
--- /dev/null
+++ b/kem/ntruprime/ntrulpr761/ntruprime.go
@@ -0,0 +1,832 @@
+// Code generated from ntrulpr.templ.go. DO NOT EDIT.
+// Package ntrulpr761 implements the IND-CCA2 secure key encapsulation mechanism
+// ntrulpr761 as submitted to round 3 of the NIST PQC competition and
+// described in
+// https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
+package ntrulpr761
+import (
+	"bytes"
+	"crypto/aes"
+	"crypto/cipher"
+	cryptoRand "crypto/rand"
+	"crypto/sha512"
+	"github.com/cloudflare/circl/kem"
+	"github.com/cloudflare/circl/kem/ntruprime/internal"
+	ntrup "github.com/cloudflare/circl/pke/ntruprime/ntrulpr761"
+const (
+	p            = ntrup.P
+	q            = ntrup.Q
+	q12          = ((q - 1) / 2)
+	roundedBytes = ntrup.RoundedBytes
+	w    = ntrup.W
+	tau0 = ntrup.Tau0
+	tau1 = ntrup.Tau1
+	tau2 = ntrup.Tau2
+	tau3 = ntrup.Tau3
+	I = ntrup.I
+	hashBytes = 32
+	smallBytes = ((p + 3) / 4)
+	inputsBytes      = I / 8
+	seedBytes        = 32
+	ciphertextsBytes = roundedBytes + topBytes
+	secretKeysBytes  = smallBytes
+	publicKeysBytes  = seedBytes + roundedBytes
+	confirmBytes = 32
+	tau      = 16
+	topBytes = I / 2
+const (
+	// Size of seed for NewKeyFromSeed
+	KeySeedSize = seedBytes + p*4 + inputsBytes
+	// Size of seed for EncapsulateTo.
+	EncapsulationSeedSize = inputsBytes
+	// Size of the established shared key.
+	SharedKeySize = ntrup.SharedKeySize
+	// Size of the encapsulated shared key.
+	CiphertextSize = ntrup.CiphertextSize
+	// Size of a packed public key.
+	PublicKeySize = ntrup.PublicKeySize
+	// Size of a packed private key.
+	PrivateKeySize = ntrup.PrivateKeySize
+type (
+	small int8
+	Fq    int16
+// arithmetic operations over GF(3)
+// A polynomial of R has all of its coefficients in (-1,0,1)
+// F3 is always represented as -1,0,1
+// so ZZ_fromF3 is a no-op
+// x must not be close to top int16
+func f3Freeze(x int16) small {
+	return small(internal.Int32ModUint14(int32(x)+1, 3)) - 1
+/* ----- arithmetic mod q */
+// GF (q)
+// type Fq int16
+/* always represented as -q12...q12 */
+/* so ZZ_fromFq is a no-op */
+/* x must not be close to top int32 */
+func fqFreeze(x int32) Fq {
+	return Fq(internal.Int32ModUint14(x+q12, q) - q12)
+func top(C Fq) int8 {
+	return int8((tau1*(int32)(C+tau0) + 16384) >> 15)
+func right(T int8) Fq {
+	return fqFreeze(tau3*int32(T) - tau2)
+// Polynomials mod q
+// h = f*g in the ring Rq */
+func rqMultSmall(h []Fq, f []Fq, g []small) {
+	fg := make([]Fq, p+p-1)
+	var result Fq
+	for i := 0; i < p; i++ {
+		result = 0
+		for j := 0; j <= i; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p; i < p+p-1; i++ {
+		result = 0
+		for j := i - p + 1; j < p; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p + p - 2; i >= p; i-- {
+		fg[i-p] = fqFreeze(int32(fg[i-p] + fg[i]))
+		fg[i-p+1] = fqFreeze(int32(fg[i-p+1] + fg[i]))
+	}
+	for i := 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// Rounding all coefficients of a polynomial to the nearest multiple of 3
+// Rounded polynomials mod q
+func round(out []Fq, a []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = a[i] - Fq(f3Freeze(int16(a[i])))
+	}
+// Returns (min(x, y), max(x, y)), executes in constant time
+func minmax(x, y *uint32) {
+	var xi uint32 = *x
+	var yi uint32 = *y
+	var xy uint32 = xi ^ yi
+	var c uint32 = yi - xi
+	c ^= xy & (c ^ yi ^ 0x80000000)
+	c >>= 31
+	c = -c
+	c &= xy
+	*x = xi ^ c
+	*y = yi ^ c
+// Sorts the array of unsigned integers
+func cryptoSortUint32(x []uint32, n int) {
+	if n < 2 {
+		return
+	}
+	top := 1
+	for top < n-top {
+		top += top
+	}
+	for p := top; p > 0; p >>= 1 {
+		for i := 0; i < n-p; i++ {
+			if i&p == 0 {
+				minmax(&x[i], &x[i+p])
+			}
+		}
+		for q := top; q > p; q >>= 1 {
+			for i := 0; i < n-q; i++ {
+				if i&p == 0 {
+					minmax(&x[i+p], &x[i+q])
+				}
+			}
+		}
+	}
+// Sorting to generate short polynomial
+func shortFromList(out []small, in []int32) {
+	L := make([]uint32, p)
+	var neg2, neg3 int = -2, -3
+	for i := 0; i < w; i++ {
+		L[i] = uint32(in[i]) & uint32((neg2))
+	}
+	for i := w; i < p; i++ {
+		L[i] = (uint32(in[i]) & uint32((neg3))) | 1
+	}
+	cryptoSortUint32(L, p)
+	for i := 0; i < p; i++ {
+		out[i] = small((L[i] & 3) - 1)
+	}
+// Underlying hash function
+// The input byte array, in, is prepended by the byte b
+// and its SHA-512 hash is calculated
+// Only the first 32 bytes of the hash are returned
+// e.g., b = 0 means out = Hash0(in)
+func hashPrefix(out []byte, b int, in []byte, inlen int) {
+	x := make([]byte, inlen+1)
+	// h := make([]byte, 64)
+	x[0] = byte(b)
+	copy(x[1:], in)
+	hash := sha512.New()
+	hash.Write([]byte(x))
+	h := hash.Sum(nil)
+	copy(out, h[:32])
+// Higher level randomness
+// Returns a random unsigned integer
+// generator can be passed for deterministic number generation
+func urandom32(seed []byte) uint32 {
+	var out [4]uint32
+	out[0] = uint32(seed[0])
+	out[1] = uint32(seed[1]) << 8
+	out[2] = uint32(seed[2]) << 16
+	out[3] = uint32(seed[3]) << 24
+	return out[0] + out[1] + out[2] + out[3]
+// Generates a random short polynomial
+func shortRandom(out []small, seed []byte) {
+	L := make([]uint32, p)
+	if seed != nil {
+		for i := 0; i < p; i++ {
+			L[i] = urandom32(seed[i*4 : i*4+4])
+		}
+	} else {
+		for i := 0; i < p; i++ {
+			L[i] = urandom32(nil)
+		}
+	}
+	// Converts uint32 array to int32 array
+	L_int32 := make([]int32, p)
+	for i := 0; i < len(L); i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// NTRU LPRime Core
+// (G,A),a = keyGen(G); leaves G unchanged
+func keyGen(A []Fq, a []small, G []Fq, seed []byte) {
+	aG := make([]Fq, p)
+	shortRandom(a, seed)
+	rqMultSmall(aG, G, a)
+	round(A, aG)
+// B,T = encrypt(r,(G,A),b)
+func encrypt(B []Fq, T []int8, r []int8, G []Fq, A []Fq, b []small) {
+	bG := make([]Fq, p)
+	bA := make([]Fq, p)
+	rqMultSmall(bG, G, b)
+	round(B, bG)
+	rqMultSmall(bA, A, b)
+	for i := 0; i < I; i++ {
+		T[i] = top(fqFreeze(int32(bA[i]) + int32(r[i])*q12))
+	}
+// r = decrypt((B,T),a)
+func decrypt(r []int8, B []Fq, T []int8, a []small) {
+	aB := make([]Fq, p)
+	rqMultSmall(aB, B, a)
+	for i := 0; i < I; i++ {
+		r[i] = int8(-internal.Int16NegativeMask(int16(fqFreeze(int32(right(T[i])) - int32(aB[i]) + 4*w + 1))))
+	}
+// Encoding I-bit inputs
+type Inputs [I]int8
+func inputsEncode(s []byte, r Inputs) {
+	for i := 0; i < I; i++ {
+		s[i>>3] |= byte(r[i] << (i & 7))
+	}
+// Expand
+func expand(L []uint32, k []byte) {
+	temp := make([]byte, len(L)) // plaintext to be encrypted. Should be of the same size as L (4*P)
+	ciphertext := make([]byte, aes.BlockSize+len(temp))
+	block, err := aes.NewCipher(k[:32])
+	if err != nil {
+		panic(err)
+	}
+	stream := cipher.NewCTR(block, ciphertext[:aes.BlockSize])
+	stream.XORKeyStream(ciphertext[aes.BlockSize:], temp)
+	ciphertext = ciphertext[aes.BlockSize:]
+	// convert byte to uint32
+	for i := 0; i < len(temp); i++ {
+		L[i] = uint32(ciphertext[i])
+	}
+	for i := 0; i < p; i++ {
+		var L0 uint32 = L[4*i]
+		var L1 uint32 = L[4*i+1]
+		var L2 uint32 = L[4*i+2]
+		var L3 uint32 = L[4*i+3]
+		L[i] = L0 + (L1 << 8) + (L2 << 16) + (L3 << 24)
+	}
+// generator, hashShort
+// G = generator(k)
+func generator(G []Fq, k []byte) {
+	L := make([]uint32, 4*p)
+	expand(L, k)
+	for i := 0; i < p; i++ {
+		G[i] = Fq(internal.Uint32ModUint14(L[i], q) - q12)
+	}
+// out = hashShort(r)
+func hashShort(out []small, r Inputs) {
+	s := make([]byte, inputsBytes)
+	inputsEncode(s, r)
+	h := make([]byte, hashBytes)
+	L := make([]uint32, 4*p)
+	L_int32 := make([]int32, p)
+	hashPrefix(h, 5, s, len(s))
+	expand(L, h)
+	// convert []uint32 to []int32
+	for i := 0; i < p; i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// NTRU LPRime expand
+// (S,A),a = xKeyGen()
+func xKeyGen(S []byte, A []Fq, a []small, seed []byte) {
+	copy(S, seed[:seedBytes])
+	seed = seed[seedBytes:]
+	G := make([]Fq, p)
+	generator(G, S)
+	keyGen(A, a, G, seed)
+// B,T = xEncrypt(r,(S,A))
+func xEncrypt(B []Fq, T []int8, r []int8, S []byte, A []Fq) {
+	G := make([]Fq, p)
+	generator(G, S)
+	b := make([]small, p)
+	// convert []int8 to Inputs
+	var r_inputs Inputs
+	for i := 0; i < len(r); i++ {
+		r_inputs[i] = r[i]
+	}
+	hashShort(b, r_inputs)
+	encrypt(B, T, r, G, A, b)
+// Encoding small polynomials (including short polynomials)
+// Transform polynomial in R to bytes
+// these are the only functions that rely on p mod 4 = 1 */
+func smallEncode(s []byte, f []small) {
+	var x small
+	var index int = 0
+	for i := 0; i < p/4; i++ {
+		x = f[index] + 1
+		index++
+		x += (f[index] + 1) << 2
+		index++
+		x += (f[index] + 1) << 4
+		index++
+		x += (f[index] + 1) << 6
+		index++
+		s[0] = byte(x)
+		s = s[1:]
+	}
+	x = f[index] + 1
+	s[0] = byte(x)
+// Transform bytes into polynomial in R
+func smallDecode(f []small, s []byte) {
+	var index int = 0
+	var x byte
+	for i := 0; i < p/4; i++ {
+		x = s[0]
+		s = s[1:]
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		index++
+	}
+	x = s[0]
+	f[index] = ((small)(x & 3)) - 1
+// Encoding rounded polynomials
+// Transform rounded polynomials to bytes
+func roundedEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16((int32((r[i])+q12) * 10923) >> 15)
+		M[i] = (q + 2) / 3
+	}
+	internal.Encode(s, R, M, p)
+// Transform bytes to rounded polynomials
+func roundedDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = (q + 2) / 3
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = Fq(R[i]*3 - q12)
+	}
+// Encoding top polynomials
+func topEncode(s []byte, T []int8) {
+	for i := 0; i < topBytes; i++ {
+		s[i] = byte(T[2*i] + (T[2*i+1] << 4))
+	}
+func topDecode(T []int8, s []byte) {
+	for i := 0; i < topBytes; i++ {
+		T[2*i] = int8(s[i] & 15)
+		T[2*i+1] = int8(s[i] >> 4)
+	}
+// Streamlined NTRU Prime Core plus encoding
+func inputsRandom(r *Inputs, seed []byte) {
+	for i := 0; i < I; i++ {
+		r[i] = int8(1 & (seed[i>>3] >> (i & 7)))
+	}
+// Generates public key and private key
+// pk,sk = zKeyGen()
+func zKeyGen(pk []byte, sk []byte, seed []byte) {
+	A := make([]Fq, p)
+	a := make([]small, p)
+	xKeyGen(pk, A, a, seed)
+	pk = pk[seedBytes:]
+	roundedEncode(pk, A)
+	smallEncode(sk, a)
+// c = zEncrypt(r,pk)
+func zEncrypt(c []byte, r Inputs, pk []byte) {
+	A := make([]Fq, p)
+	B := make([]Fq, p)
+	T := make([]int8, I)
+	roundedDecode(A, pk[seedBytes:])
+	xEncrypt(B, T, r[:], pk[:seedBytes], A)
+	roundedEncode(c, B)
+	c = c[roundedBytes:]
+	topEncode(c, T)
+// r = zDecrypt(C,sk)
+func zDecrypt(r *Inputs, c []byte, sk []byte) {
+	a := make([]small, p)
+	B := make([]Fq, p)
+	T := make([]int8, I)
+	smallDecode(a, sk)
+	roundedDecode(B, c)
+	topDecode(T, c[roundedBytes:])
+	decrypt(r[:], B, T, a)
+// Confirmation hash
+// h = hashConfirm(r,pk,cache); cache is Hash4(pk)
+func hashConfirm(h []byte, r []byte, pk []byte, cache []byte) {
+	x := make([]byte, inputsBytes+hashBytes)
+	copy(x, r)
+	copy(x[inputsBytes:], cache)
+	hashPrefix(h, 2, x, len(x))
+// Session-key hash
+// k = hashSession(b,y,z)
+func hashSession(k []byte, b int, y []byte, z []byte) {
+	x := make([]byte, inputsBytes+ciphertextsBytes+confirmBytes)
+	copy(x[:inputsBytes], y)
+	copy(x[inputsBytes:], z)
+	hashPrefix(k, b, x, len(x))
+//  Streamlined NTRU Prime
+// pk,sk = kemKeyGen()
+func kemKeyGen(pk []byte, sk []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, KeySeedSize)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != KeySeedSize {
+		panic("seed must be of length KeySeedSize")
+	}
+	zKeyGen(pk, sk, seed[:seedBytes+p*4])
+	seed = seed[seedBytes+p*4:]
+	sk = sk[secretKeysBytes:]
+	copy(sk, pk)
+	sk = sk[publicKeysBytes:]
+	copy(sk[:inputsBytes], seed)
+	sk = sk[inputsBytes:]
+	hashPrefix(sk, 4, pk, publicKeysBytes)
+// c,r_enc = hide(r,pk,cache); cache is Hash4(pk)
+func hide(c []byte, r_enc []byte, r Inputs, pk []byte, cache []byte) {
+	inputsEncode(r_enc, r)
+	zEncrypt(c, r, pk)
+	c = c[ciphertextsBytes:]
+	hashConfirm(c, r_enc, pk, cache)
+// Takes as input a public key
+// Returns ciphertext and shared key
+// c,k = encap(pk)
+func (pk PublicKey) EncapsulateTo(c []byte, k []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, EncapsulationSeedSize)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != EncapsulationSeedSize {
+		panic("seed must be of length EncapsulationSeedSize")
+	}
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	r_enc := make([]byte, inputsBytes)
+	cache := make([]byte, hashBytes)
+	var r Inputs
+	hashPrefix(cache, 4, pk.pk[:], publicKeysBytes)
+	inputsRandom(&r, seed)
+	hide(c, r_enc, r, pk.pk[:], cache)
+	hashSession(k, 1, r_enc, c)
+// Returns 0 if matching ciphertext+confirm, else -1
+func ciphertextsDiffMask(c []byte, c2 []byte) int {
+	var differentbits uint16 = 0
+	var len int = ciphertextsBytes + confirmBytes
+	for i := 0; i < len; i++ {
+		differentbits |= uint16((c[i]) ^ (c2[i]))
+	}
+	return int((1 & ((differentbits - 1) >> 8)) - 1)
+// Returns shared key from ciphertext and private key
+// k = decap(c,sk)
+func (priv *PrivateKey) DecapsulateTo(ss []byte, ct []byte) {
+	if len(ct) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(ss) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	sk := priv.sk[:]
+	pk := sk[secretKeysBytes:]
+	rho := pk[publicKeysBytes:]
+	cache := rho[inputsBytes:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cnew := make([]byte, ciphertextsBytes+confirmBytes)
+	zDecrypt(&r, ct, sk)
+	hide(cnew, r_enc, r, pk, cache)
+	var mask int = ciphertextsDiffMask(ct, cnew)
+	for i := 0; i < inputsBytes; i++ {
+		r_enc[i] ^= byte(mask & int(r_enc[i]^rho[i]))
+	}
+	hashSession(ss, 1+mask, r_enc, ct)
+// The structure of the private key is given by the following segments:
+// The secret key, the public key, entropy and the hash of the public key
+type PrivateKey struct {
+	sk [PrivateKeySize]byte
+type PublicKey struct {
+	pk [PublicKeySize]byte
+type scheme struct{}
+var sch kem.Scheme = &scheme{}
+// Scheme returns a KEM interface.
+func Scheme() kem.Scheme { return sch }
+func (*scheme) Name() string               { return "ntrulpr761" }
+func (*scheme) PublicKeySize() int         { return PublicKeySize }
+func (*scheme) PrivateKeySize() int        { return PrivateKeySize }
+func (*scheme) SeedSize() int              { return KeySeedSize }
+func (*scheme) SharedKeySize() int         { return SharedKeySize }
+func (*scheme) CiphertextSize() int        { return CiphertextSize }
+func (*scheme) EncapsulationSeedSize() int { return EncapsulationSeedSize }
+func (sk *PrivateKey) Scheme() kem.Scheme { return sch }
+func (pk *PublicKey) Scheme() kem.Scheme  { return sch }
+func (sk *PrivateKey) MarshalBinary() ([]byte, error) {
+	var ret [PrivateKeySize]byte
+	copy(ret[:], sk.sk[:])
+	return ret[:], nil
+func (sk *PrivateKey) Equal(other kem.PrivateKey) bool {
+	oth, ok := other.(*PrivateKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(sk.sk[:], oth.sk[:])
+func (pk *PublicKey) Equal(other kem.PublicKey) bool {
+	oth, ok := other.(*PublicKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(pk.pk[:], oth.pk[:])
+func (sk *PrivateKey) Public() kem.PublicKey {
+	var pk [PublicKeySize]byte
+	skey, _ := sk.MarshalBinary()
+	ppk := skey[secretKeysBytes : secretKeysBytes+publicKeysBytes]
+	copy(pk[:], ppk[:])
+	return &PublicKey{pk: pk}
+func (pk *PublicKey) MarshalBinary() ([]byte, error) {
+	var ret [PublicKeySize]byte
+	copy(ret[:], pk.pk[:])
+	return ret[:], nil
+func (*scheme) GenerateKeyPair() (kem.PublicKey, kem.PrivateKey, error) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], nil)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}, nil
+func (*scheme) DeriveKeyPair(seed []byte) (kem.PublicKey, kem.PrivateKey) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], seed)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}
+func (*scheme) Encapsulate(pk kem.PublicKey) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, nil)
+	return ct, ss, nil
+func (*scheme) EncapsulateDeterministically(pk kem.PublicKey, seed []byte) (ct, ss []byte, err error) {
+	if len(seed) != EncapsulationSeedSize {
+		return nil, nil, kem.ErrSeedSize
+	}
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, seed)
+	return ct, ss, nil
+func (*scheme) Decapsulate(sk kem.PrivateKey, ct []byte) ([]byte, error) {
+	ssk, ok := sk.(*PrivateKey)
+	if !ok {
+		return nil, kem.ErrTypeMismatch
+	}
+	if len(ct) != CiphertextSize {
+		return nil, kem.ErrCiphertextSize
+	}
+	ss := [SharedKeySize]byte{}
+	ssk.DecapsulateTo(ss[:], ct)
+	return ss[:], nil
+func (*scheme) UnmarshalBinaryPublicKey(buf []byte) (kem.PublicKey, error) {
+	if len(buf) != PublicKeySize {
+		return nil, kem.ErrPubKeySize
+	}
+	pk := [PublicKeySize]byte{}
+	copy(pk[:], buf)
+	return &PublicKey{pk: pk}, nil
+func (*scheme) UnmarshalBinaryPrivateKey(buf []byte) (kem.PrivateKey, error) {
+	if len(buf) != PrivateKeySize {
+		return nil, kem.ErrPrivKeySize
+	}
+	sk := [PrivateKeySize]byte{}
+	copy(sk[:], buf)
+	return &PrivateKey{sk: sk}, nil
diff --git a/kem/ntruprime/ntrulpr857/ntruprime.go b/kem/ntruprime/ntrulpr857/ntruprime.go
new file mode 100644
index 000000000..d3f1543b5
--- /dev/null
+++ b/kem/ntruprime/ntrulpr857/ntruprime.go
@@ -0,0 +1,832 @@
+// Code generated from ntrulpr.templ.go. DO NOT EDIT.
+// Package ntrulpr857 implements the IND-CCA2 secure key encapsulation mechanism
+// ntrulpr857 as submitted to round 3 of the NIST PQC competition and
+// described in
+// https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
+package ntrulpr857
+import (
+	"bytes"
+	"crypto/aes"
+	"crypto/cipher"
+	cryptoRand "crypto/rand"
+	"crypto/sha512"
+	"github.com/cloudflare/circl/kem"
+	"github.com/cloudflare/circl/kem/ntruprime/internal"
+	ntrup "github.com/cloudflare/circl/pke/ntruprime/ntrulpr857"
+const (
+	p            = ntrup.P
+	q            = ntrup.Q
+	q12          = ((q - 1) / 2)
+	roundedBytes = ntrup.RoundedBytes
+	w    = ntrup.W
+	tau0 = ntrup.Tau0
+	tau1 = ntrup.Tau1
+	tau2 = ntrup.Tau2
+	tau3 = ntrup.Tau3
+	I = ntrup.I
+	hashBytes = 32
+	smallBytes = ((p + 3) / 4)
+	inputsBytes      = I / 8
+	seedBytes        = 32
+	ciphertextsBytes = roundedBytes + topBytes
+	secretKeysBytes  = smallBytes
+	publicKeysBytes  = seedBytes + roundedBytes
+	confirmBytes = 32
+	tau      = 16
+	topBytes = I / 2
+const (
+	// Size of seed for NewKeyFromSeed
+	KeySeedSize = seedBytes + p*4 + inputsBytes
+	// Size of seed for EncapsulateTo.
+	EncapsulationSeedSize = inputsBytes
+	// Size of the established shared key.
+	SharedKeySize = ntrup.SharedKeySize
+	// Size of the encapsulated shared key.
+	CiphertextSize = ntrup.CiphertextSize
+	// Size of a packed public key.
+	PublicKeySize = ntrup.PublicKeySize
+	// Size of a packed private key.
+	PrivateKeySize = ntrup.PrivateKeySize
+type (
+	small int8
+	Fq    int16
+// arithmetic operations over GF(3)
+// A polynomial of R has all of its coefficients in (-1,0,1)
+// F3 is always represented as -1,0,1
+// so ZZ_fromF3 is a no-op
+// x must not be close to top int16
+func f3Freeze(x int16) small {
+	return small(internal.Int32ModUint14(int32(x)+1, 3)) - 1
+/* ----- arithmetic mod q */
+// GF (q)
+// type Fq int16
+/* always represented as -q12...q12 */
+/* so ZZ_fromFq is a no-op */
+/* x must not be close to top int32 */
+func fqFreeze(x int32) Fq {
+	return Fq(internal.Int32ModUint14(x+q12, q) - q12)
+func top(C Fq) int8 {
+	return int8((tau1*(int32)(C+tau0) + 16384) >> 15)
+func right(T int8) Fq {
+	return fqFreeze(tau3*int32(T) - tau2)
+// Polynomials mod q
+// h = f*g in the ring Rq */
+func rqMultSmall(h []Fq, f []Fq, g []small) {
+	fg := make([]Fq, p+p-1)
+	var result Fq
+	for i := 0; i < p; i++ {
+		result = 0
+		for j := 0; j <= i; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p; i < p+p-1; i++ {
+		result = 0
+		for j := i - p + 1; j < p; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p + p - 2; i >= p; i-- {
+		fg[i-p] = fqFreeze(int32(fg[i-p] + fg[i]))
+		fg[i-p+1] = fqFreeze(int32(fg[i-p+1] + fg[i]))
+	}
+	for i := 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// Rounding all coefficients of a polynomial to the nearest multiple of 3
+// Rounded polynomials mod q
+func round(out []Fq, a []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = a[i] - Fq(f3Freeze(int16(a[i])))
+	}
+// Returns (min(x, y), max(x, y)), executes in constant time
+func minmax(x, y *uint32) {
+	var xi uint32 = *x
+	var yi uint32 = *y
+	var xy uint32 = xi ^ yi
+	var c uint32 = yi - xi
+	c ^= xy & (c ^ yi ^ 0x80000000)
+	c >>= 31
+	c = -c
+	c &= xy
+	*x = xi ^ c
+	*y = yi ^ c
+// Sorts the array of unsigned integers
+func cryptoSortUint32(x []uint32, n int) {
+	if n < 2 {
+		return
+	}
+	top := 1
+	for top < n-top {
+		top += top
+	}
+	for p := top; p > 0; p >>= 1 {
+		for i := 0; i < n-p; i++ {
+			if i&p == 0 {
+				minmax(&x[i], &x[i+p])
+			}
+		}
+		for q := top; q > p; q >>= 1 {
+			for i := 0; i < n-q; i++ {
+				if i&p == 0 {
+					minmax(&x[i+p], &x[i+q])
+				}
+			}
+		}
+	}
+// Sorting to generate short polynomial
+func shortFromList(out []small, in []int32) {
+	L := make([]uint32, p)
+	var neg2, neg3 int = -2, -3
+	for i := 0; i < w; i++ {
+		L[i] = uint32(in[i]) & uint32((neg2))
+	}
+	for i := w; i < p; i++ {
+		L[i] = (uint32(in[i]) & uint32((neg3))) | 1
+	}
+	cryptoSortUint32(L, p)
+	for i := 0; i < p; i++ {
+		out[i] = small((L[i] & 3) - 1)
+	}
+// Underlying hash function
+// The input byte array, in, is prepended by the byte b
+// and its SHA-512 hash is calculated
+// Only the first 32 bytes of the hash are returned
+// e.g., b = 0 means out = Hash0(in)
+func hashPrefix(out []byte, b int, in []byte, inlen int) {
+	x := make([]byte, inlen+1)
+	// h := make([]byte, 64)
+	x[0] = byte(b)
+	copy(x[1:], in)
+	hash := sha512.New()
+	hash.Write([]byte(x))
+	h := hash.Sum(nil)
+	copy(out, h[:32])
+// Higher level randomness
+// Returns a random unsigned integer
+// generator can be passed for deterministic number generation
+func urandom32(seed []byte) uint32 {
+	var out [4]uint32
+	out[0] = uint32(seed[0])
+	out[1] = uint32(seed[1]) << 8
+	out[2] = uint32(seed[2]) << 16
+	out[3] = uint32(seed[3]) << 24
+	return out[0] + out[1] + out[2] + out[3]
+// Generates a random short polynomial
+func shortRandom(out []small, seed []byte) {
+	L := make([]uint32, p)
+	if seed != nil {
+		for i := 0; i < p; i++ {
+			L[i] = urandom32(seed[i*4 : i*4+4])
+		}
+	} else {
+		for i := 0; i < p; i++ {
+			L[i] = urandom32(nil)
+		}
+	}
+	// Converts uint32 array to int32 array
+	L_int32 := make([]int32, p)
+	for i := 0; i < len(L); i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// NTRU LPRime Core
+// (G,A),a = keyGen(G); leaves G unchanged
+func keyGen(A []Fq, a []small, G []Fq, seed []byte) {
+	aG := make([]Fq, p)
+	shortRandom(a, seed)
+	rqMultSmall(aG, G, a)
+	round(A, aG)
+// B,T = encrypt(r,(G,A),b)
+func encrypt(B []Fq, T []int8, r []int8, G []Fq, A []Fq, b []small) {
+	bG := make([]Fq, p)
+	bA := make([]Fq, p)
+	rqMultSmall(bG, G, b)
+	round(B, bG)
+	rqMultSmall(bA, A, b)
+	for i := 0; i < I; i++ {
+		T[i] = top(fqFreeze(int32(bA[i]) + int32(r[i])*q12))
+	}
+// r = decrypt((B,T),a)
+func decrypt(r []int8, B []Fq, T []int8, a []small) {
+	aB := make([]Fq, p)
+	rqMultSmall(aB, B, a)
+	for i := 0; i < I; i++ {
+		r[i] = int8(-internal.Int16NegativeMask(int16(fqFreeze(int32(right(T[i])) - int32(aB[i]) + 4*w + 1))))
+	}
+// Encoding I-bit inputs
+type Inputs [I]int8
+func inputsEncode(s []byte, r Inputs) {
+	for i := 0; i < I; i++ {
+		s[i>>3] |= byte(r[i] << (i & 7))
+	}
+// Expand
+func expand(L []uint32, k []byte) {
+	temp := make([]byte, len(L)) // plaintext to be encrypted. Should be of the same size as L (4*P)
+	ciphertext := make([]byte, aes.BlockSize+len(temp))
+	block, err := aes.NewCipher(k[:32])
+	if err != nil {
+		panic(err)
+	}
+	stream := cipher.NewCTR(block, ciphertext[:aes.BlockSize])
+	stream.XORKeyStream(ciphertext[aes.BlockSize:], temp)
+	ciphertext = ciphertext[aes.BlockSize:]
+	// convert byte to uint32
+	for i := 0; i < len(temp); i++ {
+		L[i] = uint32(ciphertext[i])
+	}
+	for i := 0; i < p; i++ {
+		var L0 uint32 = L[4*i]
+		var L1 uint32 = L[4*i+1]
+		var L2 uint32 = L[4*i+2]
+		var L3 uint32 = L[4*i+3]
+		L[i] = L0 + (L1 << 8) + (L2 << 16) + (L3 << 24)
+	}
+// generator, hashShort
+// G = generator(k)
+func generator(G []Fq, k []byte) {
+	L := make([]uint32, 4*p)
+	expand(L, k)
+	for i := 0; i < p; i++ {
+		G[i] = Fq(internal.Uint32ModUint14(L[i], q) - q12)
+	}
+// out = hashShort(r)
+func hashShort(out []small, r Inputs) {
+	s := make([]byte, inputsBytes)
+	inputsEncode(s, r)
+	h := make([]byte, hashBytes)
+	L := make([]uint32, 4*p)
+	L_int32 := make([]int32, p)
+	hashPrefix(h, 5, s, len(s))
+	expand(L, h)
+	// convert []uint32 to []int32
+	for i := 0; i < p; i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// NTRU LPRime expand
+// (S,A),a = xKeyGen()
+func xKeyGen(S []byte, A []Fq, a []small, seed []byte) {
+	copy(S, seed[:seedBytes])
+	seed = seed[seedBytes:]
+	G := make([]Fq, p)
+	generator(G, S)
+	keyGen(A, a, G, seed)
+// B,T = xEncrypt(r,(S,A))
+func xEncrypt(B []Fq, T []int8, r []int8, S []byte, A []Fq) {
+	G := make([]Fq, p)
+	generator(G, S)
+	b := make([]small, p)
+	// convert []int8 to Inputs
+	var r_inputs Inputs
+	for i := 0; i < len(r); i++ {
+		r_inputs[i] = r[i]
+	}
+	hashShort(b, r_inputs)
+	encrypt(B, T, r, G, A, b)
+// Encoding small polynomials (including short polynomials)
+// Transform polynomial in R to bytes
+// these are the only functions that rely on p mod 4 = 1 */
+func smallEncode(s []byte, f []small) {
+	var x small
+	var index int = 0
+	for i := 0; i < p/4; i++ {
+		x = f[index] + 1
+		index++
+		x += (f[index] + 1) << 2
+		index++
+		x += (f[index] + 1) << 4
+		index++
+		x += (f[index] + 1) << 6
+		index++
+		s[0] = byte(x)
+		s = s[1:]
+	}
+	x = f[index] + 1
+	s[0] = byte(x)
+// Transform bytes into polynomial in R
+func smallDecode(f []small, s []byte) {
+	var index int = 0
+	var x byte
+	for i := 0; i < p/4; i++ {
+		x = s[0]
+		s = s[1:]
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		index++
+	}
+	x = s[0]
+	f[index] = ((small)(x & 3)) - 1
+// Encoding rounded polynomials
+// Transform rounded polynomials to bytes
+func roundedEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16((int32((r[i])+q12) * 10923) >> 15)
+		M[i] = (q + 2) / 3
+	}
+	internal.Encode(s, R, M, p)
+// Transform bytes to rounded polynomials
+func roundedDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = (q + 2) / 3
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = Fq(R[i]*3 - q12)
+	}
+// Encoding top polynomials
+func topEncode(s []byte, T []int8) {
+	for i := 0; i < topBytes; i++ {
+		s[i] = byte(T[2*i] + (T[2*i+1] << 4))
+	}
+func topDecode(T []int8, s []byte) {
+	for i := 0; i < topBytes; i++ {
+		T[2*i] = int8(s[i] & 15)
+		T[2*i+1] = int8(s[i] >> 4)
+	}
+// Streamlined NTRU Prime Core plus encoding
+func inputsRandom(r *Inputs, seed []byte) {
+	for i := 0; i < I; i++ {
+		r[i] = int8(1 & (seed[i>>3] >> (i & 7)))
+	}
+// Generates public key and private key
+// pk,sk = zKeyGen()
+func zKeyGen(pk []byte, sk []byte, seed []byte) {
+	A := make([]Fq, p)
+	a := make([]small, p)
+	xKeyGen(pk, A, a, seed)
+	pk = pk[seedBytes:]
+	roundedEncode(pk, A)
+	smallEncode(sk, a)
+// c = zEncrypt(r,pk)
+func zEncrypt(c []byte, r Inputs, pk []byte) {
+	A := make([]Fq, p)
+	B := make([]Fq, p)
+	T := make([]int8, I)
+	roundedDecode(A, pk[seedBytes:])
+	xEncrypt(B, T, r[:], pk[:seedBytes], A)
+	roundedEncode(c, B)
+	c = c[roundedBytes:]
+	topEncode(c, T)
+// r = zDecrypt(C,sk)
+func zDecrypt(r *Inputs, c []byte, sk []byte) {
+	a := make([]small, p)
+	B := make([]Fq, p)
+	T := make([]int8, I)
+	smallDecode(a, sk)
+	roundedDecode(B, c)
+	topDecode(T, c[roundedBytes:])
+	decrypt(r[:], B, T, a)
+// Confirmation hash
+// h = hashConfirm(r,pk,cache); cache is Hash4(pk)
+func hashConfirm(h []byte, r []byte, pk []byte, cache []byte) {
+	x := make([]byte, inputsBytes+hashBytes)
+	copy(x, r)
+	copy(x[inputsBytes:], cache)
+	hashPrefix(h, 2, x, len(x))
+// Session-key hash
+// k = hashSession(b,y,z)
+func hashSession(k []byte, b int, y []byte, z []byte) {
+	x := make([]byte, inputsBytes+ciphertextsBytes+confirmBytes)
+	copy(x[:inputsBytes], y)
+	copy(x[inputsBytes:], z)
+	hashPrefix(k, b, x, len(x))
+//  Streamlined NTRU Prime
+// pk,sk = kemKeyGen()
+func kemKeyGen(pk []byte, sk []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, KeySeedSize)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != KeySeedSize {
+		panic("seed must be of length KeySeedSize")
+	}
+	zKeyGen(pk, sk, seed[:seedBytes+p*4])
+	seed = seed[seedBytes+p*4:]
+	sk = sk[secretKeysBytes:]
+	copy(sk, pk)
+	sk = sk[publicKeysBytes:]
+	copy(sk[:inputsBytes], seed)
+	sk = sk[inputsBytes:]
+	hashPrefix(sk, 4, pk, publicKeysBytes)
+// c,r_enc = hide(r,pk,cache); cache is Hash4(pk)
+func hide(c []byte, r_enc []byte, r Inputs, pk []byte, cache []byte) {
+	inputsEncode(r_enc, r)
+	zEncrypt(c, r, pk)
+	c = c[ciphertextsBytes:]
+	hashConfirm(c, r_enc, pk, cache)
+// Takes as input a public key
+// Returns ciphertext and shared key
+// c,k = encap(pk)
+func (pk PublicKey) EncapsulateTo(c []byte, k []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, EncapsulationSeedSize)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != EncapsulationSeedSize {
+		panic("seed must be of length EncapsulationSeedSize")
+	}
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	r_enc := make([]byte, inputsBytes)
+	cache := make([]byte, hashBytes)
+	var r Inputs
+	hashPrefix(cache, 4, pk.pk[:], publicKeysBytes)
+	inputsRandom(&r, seed)
+	hide(c, r_enc, r, pk.pk[:], cache)
+	hashSession(k, 1, r_enc, c)
+// Returns 0 if matching ciphertext+confirm, else -1
+func ciphertextsDiffMask(c []byte, c2 []byte) int {
+	var differentbits uint16 = 0
+	var len int = ciphertextsBytes + confirmBytes
+	for i := 0; i < len; i++ {
+		differentbits |= uint16((c[i]) ^ (c2[i]))
+	}
+	return int((1 & ((differentbits - 1) >> 8)) - 1)
+// Returns shared key from ciphertext and private key
+// k = decap(c,sk)
+func (priv *PrivateKey) DecapsulateTo(ss []byte, ct []byte) {
+	if len(ct) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(ss) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	sk := priv.sk[:]
+	pk := sk[secretKeysBytes:]
+	rho := pk[publicKeysBytes:]
+	cache := rho[inputsBytes:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cnew := make([]byte, ciphertextsBytes+confirmBytes)
+	zDecrypt(&r, ct, sk)
+	hide(cnew, r_enc, r, pk, cache)
+	var mask int = ciphertextsDiffMask(ct, cnew)
+	for i := 0; i < inputsBytes; i++ {
+		r_enc[i] ^= byte(mask & int(r_enc[i]^rho[i]))
+	}
+	hashSession(ss, 1+mask, r_enc, ct)
+// The structure of the private key is given by the following segments:
+// The secret key, the public key, entropy and the hash of the public key
+type PrivateKey struct {
+	sk [PrivateKeySize]byte
+type PublicKey struct {
+	pk [PublicKeySize]byte
+type scheme struct{}
+var sch kem.Scheme = &scheme{}
+// Scheme returns a KEM interface.
+func Scheme() kem.Scheme { return sch }
+func (*scheme) Name() string               { return "ntrulpr857" }
+func (*scheme) PublicKeySize() int         { return PublicKeySize }
+func (*scheme) PrivateKeySize() int        { return PrivateKeySize }
+func (*scheme) SeedSize() int              { return KeySeedSize }
+func (*scheme) SharedKeySize() int         { return SharedKeySize }
+func (*scheme) CiphertextSize() int        { return CiphertextSize }
+func (*scheme) EncapsulationSeedSize() int { return EncapsulationSeedSize }
+func (sk *PrivateKey) Scheme() kem.Scheme { return sch }
+func (pk *PublicKey) Scheme() kem.Scheme  { return sch }
+func (sk *PrivateKey) MarshalBinary() ([]byte, error) {
+	var ret [PrivateKeySize]byte
+	copy(ret[:], sk.sk[:])
+	return ret[:], nil
+func (sk *PrivateKey) Equal(other kem.PrivateKey) bool {
+	oth, ok := other.(*PrivateKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(sk.sk[:], oth.sk[:])
+func (pk *PublicKey) Equal(other kem.PublicKey) bool {
+	oth, ok := other.(*PublicKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(pk.pk[:], oth.pk[:])
+func (sk *PrivateKey) Public() kem.PublicKey {
+	var pk [PublicKeySize]byte
+	skey, _ := sk.MarshalBinary()
+	ppk := skey[secretKeysBytes : secretKeysBytes+publicKeysBytes]
+	copy(pk[:], ppk[:])
+	return &PublicKey{pk: pk}
+func (pk *PublicKey) MarshalBinary() ([]byte, error) {
+	var ret [PublicKeySize]byte
+	copy(ret[:], pk.pk[:])
+	return ret[:], nil
+func (*scheme) GenerateKeyPair() (kem.PublicKey, kem.PrivateKey, error) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], nil)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}, nil
+func (*scheme) DeriveKeyPair(seed []byte) (kem.PublicKey, kem.PrivateKey) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], seed)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}
+func (*scheme) Encapsulate(pk kem.PublicKey) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, nil)
+	return ct, ss, nil
+func (*scheme) EncapsulateDeterministically(pk kem.PublicKey, seed []byte) (ct, ss []byte, err error) {
+	if len(seed) != EncapsulationSeedSize {
+		return nil, nil, kem.ErrSeedSize
+	}
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, seed)
+	return ct, ss, nil
+func (*scheme) Decapsulate(sk kem.PrivateKey, ct []byte) ([]byte, error) {
+	ssk, ok := sk.(*PrivateKey)
+	if !ok {
+		return nil, kem.ErrTypeMismatch
+	}
+	if len(ct) != CiphertextSize {
+		return nil, kem.ErrCiphertextSize
+	}
+	ss := [SharedKeySize]byte{}
+	ssk.DecapsulateTo(ss[:], ct)
+	return ss[:], nil
+func (*scheme) UnmarshalBinaryPublicKey(buf []byte) (kem.PublicKey, error) {
+	if len(buf) != PublicKeySize {
+		return nil, kem.ErrPubKeySize
+	}
+	pk := [PublicKeySize]byte{}
+	copy(pk[:], buf)
+	return &PublicKey{pk: pk}, nil
+func (*scheme) UnmarshalBinaryPrivateKey(buf []byte) (kem.PrivateKey, error) {
+	if len(buf) != PrivateKeySize {
+		return nil, kem.ErrPrivKeySize
+	}
+	sk := [PrivateKeySize]byte{}
+	copy(sk[:], buf)
+	return &PrivateKey{sk: sk}, nil
diff --git a/kem/ntruprime/ntrulpr953/ntruprime.go b/kem/ntruprime/ntrulpr953/ntruprime.go
new file mode 100644
index 000000000..6f56710bd
--- /dev/null
+++ b/kem/ntruprime/ntrulpr953/ntruprime.go
@@ -0,0 +1,832 @@
+// Code generated from ntrulpr.templ.go. DO NOT EDIT.
+// Package ntrulpr953 implements the IND-CCA2 secure key encapsulation mechanism
+// ntrulpr953 as submitted to round 3 of the NIST PQC competition and
+// described in
+// https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
+package ntrulpr953
+import (
+	"bytes"
+	"crypto/aes"
+	"crypto/cipher"
+	cryptoRand "crypto/rand"
+	"crypto/sha512"
+	"github.com/cloudflare/circl/kem"
+	"github.com/cloudflare/circl/kem/ntruprime/internal"
+	ntrup "github.com/cloudflare/circl/pke/ntruprime/ntrulpr953"
+const (
+	p            = ntrup.P
+	q            = ntrup.Q
+	q12          = ((q - 1) / 2)
+	roundedBytes = ntrup.RoundedBytes
+	w    = ntrup.W
+	tau0 = ntrup.Tau0
+	tau1 = ntrup.Tau1
+	tau2 = ntrup.Tau2
+	tau3 = ntrup.Tau3
+	I = ntrup.I
+	hashBytes = 32
+	smallBytes = ((p + 3) / 4)
+	inputsBytes      = I / 8
+	seedBytes        = 32
+	ciphertextsBytes = roundedBytes + topBytes
+	secretKeysBytes  = smallBytes
+	publicKeysBytes  = seedBytes + roundedBytes
+	confirmBytes = 32
+	tau      = 16
+	topBytes = I / 2
+const (
+	// Size of seed for NewKeyFromSeed
+	KeySeedSize = seedBytes + p*4 + inputsBytes
+	// Size of seed for EncapsulateTo.
+	EncapsulationSeedSize = inputsBytes
+	// Size of the established shared key.
+	SharedKeySize = ntrup.SharedKeySize
+	// Size of the encapsulated shared key.
+	CiphertextSize = ntrup.CiphertextSize
+	// Size of a packed public key.
+	PublicKeySize = ntrup.PublicKeySize
+	// Size of a packed private key.
+	PrivateKeySize = ntrup.PrivateKeySize
+type (
+	small int8
+	Fq    int16
+// arithmetic operations over GF(3)
+// A polynomial of R has all of its coefficients in (-1,0,1)
+// F3 is always represented as -1,0,1
+// so ZZ_fromF3 is a no-op
+// x must not be close to top int16
+func f3Freeze(x int16) small {
+	return small(internal.Int32ModUint14(int32(x)+1, 3)) - 1
+/* ----- arithmetic mod q */
+// GF (q)
+// type Fq int16
+/* always represented as -q12...q12 */
+/* so ZZ_fromFq is a no-op */
+/* x must not be close to top int32 */
+func fqFreeze(x int32) Fq {
+	return Fq(internal.Int32ModUint14(x+q12, q) - q12)
+func top(C Fq) int8 {
+	return int8((tau1*(int32)(C+tau0) + 16384) >> 15)
+func right(T int8) Fq {
+	return fqFreeze(tau3*int32(T) - tau2)
+// Polynomials mod q
+// h = f*g in the ring Rq */
+func rqMultSmall(h []Fq, f []Fq, g []small) {
+	fg := make([]Fq, p+p-1)
+	var result Fq
+	for i := 0; i < p; i++ {
+		result = 0
+		for j := 0; j <= i; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p; i < p+p-1; i++ {
+		result = 0
+		for j := i - p + 1; j < p; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p + p - 2; i >= p; i-- {
+		fg[i-p] = fqFreeze(int32(fg[i-p] + fg[i]))
+		fg[i-p+1] = fqFreeze(int32(fg[i-p+1] + fg[i]))
+	}
+	for i := 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// Rounding all coefficients of a polynomial to the nearest multiple of 3
+// Rounded polynomials mod q
+func round(out []Fq, a []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = a[i] - Fq(f3Freeze(int16(a[i])))
+	}
+// Returns (min(x, y), max(x, y)), executes in constant time
+func minmax(x, y *uint32) {
+	var xi uint32 = *x
+	var yi uint32 = *y
+	var xy uint32 = xi ^ yi
+	var c uint32 = yi - xi
+	c ^= xy & (c ^ yi ^ 0x80000000)
+	c >>= 31
+	c = -c
+	c &= xy
+	*x = xi ^ c
+	*y = yi ^ c
+// Sorts the array of unsigned integers
+func cryptoSortUint32(x []uint32, n int) {
+	if n < 2 {
+		return
+	}
+	top := 1
+	for top < n-top {
+		top += top
+	}
+	for p := top; p > 0; p >>= 1 {
+		for i := 0; i < n-p; i++ {
+			if i&p == 0 {
+				minmax(&x[i], &x[i+p])
+			}
+		}
+		for q := top; q > p; q >>= 1 {
+			for i := 0; i < n-q; i++ {
+				if i&p == 0 {
+					minmax(&x[i+p], &x[i+q])
+				}
+			}
+		}
+	}
+// Sorting to generate short polynomial
+func shortFromList(out []small, in []int32) {
+	L := make([]uint32, p)
+	var neg2, neg3 int = -2, -3
+	for i := 0; i < w; i++ {
+		L[i] = uint32(in[i]) & uint32((neg2))
+	}
+	for i := w; i < p; i++ {
+		L[i] = (uint32(in[i]) & uint32((neg3))) | 1
+	}
+	cryptoSortUint32(L, p)
+	for i := 0; i < p; i++ {
+		out[i] = small((L[i] & 3) - 1)
+	}
+// Underlying hash function
+// The input byte array, in, is prepended by the byte b
+// and its SHA-512 hash is calculated
+// Only the first 32 bytes of the hash are returned
+// e.g., b = 0 means out = Hash0(in)
+func hashPrefix(out []byte, b int, in []byte, inlen int) {
+	x := make([]byte, inlen+1)
+	// h := make([]byte, 64)
+	x[0] = byte(b)
+	copy(x[1:], in)
+	hash := sha512.New()
+	hash.Write([]byte(x))
+	h := hash.Sum(nil)
+	copy(out, h[:32])
+// Higher level randomness
+// Returns a random unsigned integer
+// generator can be passed for deterministic number generation
+func urandom32(seed []byte) uint32 {
+	var out [4]uint32
+	out[0] = uint32(seed[0])
+	out[1] = uint32(seed[1]) << 8
+	out[2] = uint32(seed[2]) << 16
+	out[3] = uint32(seed[3]) << 24
+	return out[0] + out[1] + out[2] + out[3]
+// Generates a random short polynomial
+func shortRandom(out []small, seed []byte) {
+	L := make([]uint32, p)
+	if seed != nil {
+		for i := 0; i < p; i++ {
+			L[i] = urandom32(seed[i*4 : i*4+4])
+		}
+	} else {
+		for i := 0; i < p; i++ {
+			L[i] = urandom32(nil)
+		}
+	}
+	// Converts uint32 array to int32 array
+	L_int32 := make([]int32, p)
+	for i := 0; i < len(L); i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// NTRU LPRime Core
+// (G,A),a = keyGen(G); leaves G unchanged
+func keyGen(A []Fq, a []small, G []Fq, seed []byte) {
+	aG := make([]Fq, p)
+	shortRandom(a, seed)
+	rqMultSmall(aG, G, a)
+	round(A, aG)
+// B,T = encrypt(r,(G,A),b)
+func encrypt(B []Fq, T []int8, r []int8, G []Fq, A []Fq, b []small) {
+	bG := make([]Fq, p)
+	bA := make([]Fq, p)
+	rqMultSmall(bG, G, b)
+	round(B, bG)
+	rqMultSmall(bA, A, b)
+	for i := 0; i < I; i++ {
+		T[i] = top(fqFreeze(int32(bA[i]) + int32(r[i])*q12))
+	}
+// r = decrypt((B,T),a)
+func decrypt(r []int8, B []Fq, T []int8, a []small) {
+	aB := make([]Fq, p)
+	rqMultSmall(aB, B, a)
+	for i := 0; i < I; i++ {
+		r[i] = int8(-internal.Int16NegativeMask(int16(fqFreeze(int32(right(T[i])) - int32(aB[i]) + 4*w + 1))))
+	}
+// Encoding I-bit inputs
+type Inputs [I]int8
+func inputsEncode(s []byte, r Inputs) {
+	for i := 0; i < I; i++ {
+		s[i>>3] |= byte(r[i] << (i & 7))
+	}
+// Expand
+func expand(L []uint32, k []byte) {
+	temp := make([]byte, len(L)) // plaintext to be encrypted. Should be of the same size as L (4*P)
+	ciphertext := make([]byte, aes.BlockSize+len(temp))
+	block, err := aes.NewCipher(k[:32])
+	if err != nil {
+		panic(err)
+	}
+	stream := cipher.NewCTR(block, ciphertext[:aes.BlockSize])
+	stream.XORKeyStream(ciphertext[aes.BlockSize:], temp)
+	ciphertext = ciphertext[aes.BlockSize:]
+	// convert byte to uint32
+	for i := 0; i < len(temp); i++ {
+		L[i] = uint32(ciphertext[i])
+	}
+	for i := 0; i < p; i++ {
+		var L0 uint32 = L[4*i]
+		var L1 uint32 = L[4*i+1]
+		var L2 uint32 = L[4*i+2]
+		var L3 uint32 = L[4*i+3]
+		L[i] = L0 + (L1 << 8) + (L2 << 16) + (L3 << 24)
+	}
+// generator, hashShort
+// G = generator(k)
+func generator(G []Fq, k []byte) {
+	L := make([]uint32, 4*p)
+	expand(L, k)
+	for i := 0; i < p; i++ {
+		G[i] = Fq(internal.Uint32ModUint14(L[i], q) - q12)
+	}
+// out = hashShort(r)
+func hashShort(out []small, r Inputs) {
+	s := make([]byte, inputsBytes)
+	inputsEncode(s, r)
+	h := make([]byte, hashBytes)
+	L := make([]uint32, 4*p)
+	L_int32 := make([]int32, p)
+	hashPrefix(h, 5, s, len(s))
+	expand(L, h)
+	// convert []uint32 to []int32
+	for i := 0; i < p; i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// NTRU LPRime expand
+// (S,A),a = xKeyGen()
+func xKeyGen(S []byte, A []Fq, a []small, seed []byte) {
+	copy(S, seed[:seedBytes])
+	seed = seed[seedBytes:]
+	G := make([]Fq, p)
+	generator(G, S)
+	keyGen(A, a, G, seed)
+// B,T = xEncrypt(r,(S,A))
+func xEncrypt(B []Fq, T []int8, r []int8, S []byte, A []Fq) {
+	G := make([]Fq, p)
+	generator(G, S)
+	b := make([]small, p)
+	// convert []int8 to Inputs
+	var r_inputs Inputs
+	for i := 0; i < len(r); i++ {
+		r_inputs[i] = r[i]
+	}
+	hashShort(b, r_inputs)
+	encrypt(B, T, r, G, A, b)
+// Encoding small polynomials (including short polynomials)
+// Transform polynomial in R to bytes
+// these are the only functions that rely on p mod 4 = 1 */
+func smallEncode(s []byte, f []small) {
+	var x small
+	var index int = 0
+	for i := 0; i < p/4; i++ {
+		x = f[index] + 1
+		index++
+		x += (f[index] + 1) << 2
+		index++
+		x += (f[index] + 1) << 4
+		index++
+		x += (f[index] + 1) << 6
+		index++
+		s[0] = byte(x)
+		s = s[1:]
+	}
+	x = f[index] + 1
+	s[0] = byte(x)
+// Transform bytes into polynomial in R
+func smallDecode(f []small, s []byte) {
+	var index int = 0
+	var x byte
+	for i := 0; i < p/4; i++ {
+		x = s[0]
+		s = s[1:]
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		index++
+	}
+	x = s[0]
+	f[index] = ((small)(x & 3)) - 1
+// Encoding rounded polynomials
+// Transform rounded polynomials to bytes
+func roundedEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16((int32((r[i])+q12) * 10923) >> 15)
+		M[i] = (q + 2) / 3
+	}
+	internal.Encode(s, R, M, p)
+// Transform bytes to rounded polynomials
+func roundedDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = (q + 2) / 3
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = Fq(R[i]*3 - q12)
+	}
+// Encoding top polynomials
+func topEncode(s []byte, T []int8) {
+	for i := 0; i < topBytes; i++ {
+		s[i] = byte(T[2*i] + (T[2*i+1] << 4))
+	}
+func topDecode(T []int8, s []byte) {
+	for i := 0; i < topBytes; i++ {
+		T[2*i] = int8(s[i] & 15)
+		T[2*i+1] = int8(s[i] >> 4)
+	}
+// Streamlined NTRU Prime Core plus encoding
+func inputsRandom(r *Inputs, seed []byte) {
+	for i := 0; i < I; i++ {
+		r[i] = int8(1 & (seed[i>>3] >> (i & 7)))
+	}
+// Generates public key and private key
+// pk,sk = zKeyGen()
+func zKeyGen(pk []byte, sk []byte, seed []byte) {
+	A := make([]Fq, p)
+	a := make([]small, p)
+	xKeyGen(pk, A, a, seed)
+	pk = pk[seedBytes:]
+	roundedEncode(pk, A)
+	smallEncode(sk, a)
+// c = zEncrypt(r,pk)
+func zEncrypt(c []byte, r Inputs, pk []byte) {
+	A := make([]Fq, p)
+	B := make([]Fq, p)
+	T := make([]int8, I)
+	roundedDecode(A, pk[seedBytes:])
+	xEncrypt(B, T, r[:], pk[:seedBytes], A)
+	roundedEncode(c, B)
+	c = c[roundedBytes:]
+	topEncode(c, T)
+// r = zDecrypt(C,sk)
+func zDecrypt(r *Inputs, c []byte, sk []byte) {
+	a := make([]small, p)
+	B := make([]Fq, p)
+	T := make([]int8, I)
+	smallDecode(a, sk)
+	roundedDecode(B, c)
+	topDecode(T, c[roundedBytes:])
+	decrypt(r[:], B, T, a)
+// Confirmation hash
+// h = hashConfirm(r,pk,cache); cache is Hash4(pk)
+func hashConfirm(h []byte, r []byte, pk []byte, cache []byte) {
+	x := make([]byte, inputsBytes+hashBytes)
+	copy(x, r)
+	copy(x[inputsBytes:], cache)
+	hashPrefix(h, 2, x, len(x))
+// Session-key hash
+// k = hashSession(b,y,z)
+func hashSession(k []byte, b int, y []byte, z []byte) {
+	x := make([]byte, inputsBytes+ciphertextsBytes+confirmBytes)
+	copy(x[:inputsBytes], y)
+	copy(x[inputsBytes:], z)
+	hashPrefix(k, b, x, len(x))
+//  Streamlined NTRU Prime
+// pk,sk = kemKeyGen()
+func kemKeyGen(pk []byte, sk []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, KeySeedSize)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != KeySeedSize {
+		panic("seed must be of length KeySeedSize")
+	}
+	zKeyGen(pk, sk, seed[:seedBytes+p*4])
+	seed = seed[seedBytes+p*4:]
+	sk = sk[secretKeysBytes:]
+	copy(sk, pk)
+	sk = sk[publicKeysBytes:]
+	copy(sk[:inputsBytes], seed)
+	sk = sk[inputsBytes:]
+	hashPrefix(sk, 4, pk, publicKeysBytes)
+// c,r_enc = hide(r,pk,cache); cache is Hash4(pk)
+func hide(c []byte, r_enc []byte, r Inputs, pk []byte, cache []byte) {
+	inputsEncode(r_enc, r)
+	zEncrypt(c, r, pk)
+	c = c[ciphertextsBytes:]
+	hashConfirm(c, r_enc, pk, cache)
+// Takes as input a public key
+// Returns ciphertext and shared key
+// c,k = encap(pk)
+func (pk PublicKey) EncapsulateTo(c []byte, k []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, EncapsulationSeedSize)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != EncapsulationSeedSize {
+		panic("seed must be of length EncapsulationSeedSize")
+	}
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	r_enc := make([]byte, inputsBytes)
+	cache := make([]byte, hashBytes)
+	var r Inputs
+	hashPrefix(cache, 4, pk.pk[:], publicKeysBytes)
+	inputsRandom(&r, seed)
+	hide(c, r_enc, r, pk.pk[:], cache)
+	hashSession(k, 1, r_enc, c)
+// Returns 0 if matching ciphertext+confirm, else -1
+func ciphertextsDiffMask(c []byte, c2 []byte) int {
+	var differentbits uint16 = 0
+	var len int = ciphertextsBytes + confirmBytes
+	for i := 0; i < len; i++ {
+		differentbits |= uint16((c[i]) ^ (c2[i]))
+	}
+	return int((1 & ((differentbits - 1) >> 8)) - 1)
+// Returns shared key from ciphertext and private key
+// k = decap(c,sk)
+func (priv *PrivateKey) DecapsulateTo(ss []byte, ct []byte) {
+	if len(ct) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(ss) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	sk := priv.sk[:]
+	pk := sk[secretKeysBytes:]
+	rho := pk[publicKeysBytes:]
+	cache := rho[inputsBytes:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cnew := make([]byte, ciphertextsBytes+confirmBytes)
+	zDecrypt(&r, ct, sk)
+	hide(cnew, r_enc, r, pk, cache)
+	var mask int = ciphertextsDiffMask(ct, cnew)
+	for i := 0; i < inputsBytes; i++ {
+		r_enc[i] ^= byte(mask & int(r_enc[i]^rho[i]))
+	}
+	hashSession(ss, 1+mask, r_enc, ct)
+// The structure of the private key is given by the following segments:
+// The secret key, the public key, entropy and the hash of the public key
+type PrivateKey struct {
+	sk [PrivateKeySize]byte
+type PublicKey struct {
+	pk [PublicKeySize]byte
+type scheme struct{}
+var sch kem.Scheme = &scheme{}
+// Scheme returns a KEM interface.
+func Scheme() kem.Scheme { return sch }
+func (*scheme) Name() string               { return "ntrulpr953" }
+func (*scheme) PublicKeySize() int         { return PublicKeySize }
+func (*scheme) PrivateKeySize() int        { return PrivateKeySize }
+func (*scheme) SeedSize() int              { return KeySeedSize }
+func (*scheme) SharedKeySize() int         { return SharedKeySize }
+func (*scheme) CiphertextSize() int        { return CiphertextSize }
+func (*scheme) EncapsulationSeedSize() int { return EncapsulationSeedSize }
+func (sk *PrivateKey) Scheme() kem.Scheme { return sch }
+func (pk *PublicKey) Scheme() kem.Scheme  { return sch }
+func (sk *PrivateKey) MarshalBinary() ([]byte, error) {
+	var ret [PrivateKeySize]byte
+	copy(ret[:], sk.sk[:])
+	return ret[:], nil
+func (sk *PrivateKey) Equal(other kem.PrivateKey) bool {
+	oth, ok := other.(*PrivateKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(sk.sk[:], oth.sk[:])
+func (pk *PublicKey) Equal(other kem.PublicKey) bool {
+	oth, ok := other.(*PublicKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(pk.pk[:], oth.pk[:])
+func (sk *PrivateKey) Public() kem.PublicKey {
+	var pk [PublicKeySize]byte
+	skey, _ := sk.MarshalBinary()
+	ppk := skey[secretKeysBytes : secretKeysBytes+publicKeysBytes]
+	copy(pk[:], ppk[:])
+	return &PublicKey{pk: pk}
+func (pk *PublicKey) MarshalBinary() ([]byte, error) {
+	var ret [PublicKeySize]byte
+	copy(ret[:], pk.pk[:])
+	return ret[:], nil
+func (*scheme) GenerateKeyPair() (kem.PublicKey, kem.PrivateKey, error) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], nil)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}, nil
+func (*scheme) DeriveKeyPair(seed []byte) (kem.PublicKey, kem.PrivateKey) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], seed)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}
+func (*scheme) Encapsulate(pk kem.PublicKey) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, nil)
+	return ct, ss, nil
+func (*scheme) EncapsulateDeterministically(pk kem.PublicKey, seed []byte) (ct, ss []byte, err error) {
+	if len(seed) != EncapsulationSeedSize {
+		return nil, nil, kem.ErrSeedSize
+	}
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, seed)
+	return ct, ss, nil
+func (*scheme) Decapsulate(sk kem.PrivateKey, ct []byte) ([]byte, error) {
+	ssk, ok := sk.(*PrivateKey)
+	if !ok {
+		return nil, kem.ErrTypeMismatch
+	}
+	if len(ct) != CiphertextSize {
+		return nil, kem.ErrCiphertextSize
+	}
+	ss := [SharedKeySize]byte{}
+	ssk.DecapsulateTo(ss[:], ct)
+	return ss[:], nil
+func (*scheme) UnmarshalBinaryPublicKey(buf []byte) (kem.PublicKey, error) {
+	if len(buf) != PublicKeySize {
+		return nil, kem.ErrPubKeySize
+	}
+	pk := [PublicKeySize]byte{}
+	copy(pk[:], buf)
+	return &PublicKey{pk: pk}, nil
+func (*scheme) UnmarshalBinaryPrivateKey(buf []byte) (kem.PrivateKey, error) {
+	if len(buf) != PrivateKeySize {
+		return nil, kem.ErrPrivKeySize
+	}
+	sk := [PrivateKeySize]byte{}
+	copy(sk[:], buf)
+	return &PrivateKey{sk: sk}, nil
diff --git a/kem/ntruprime/sntrup1013/ntruprime.go b/kem/ntruprime/sntrup1013/ntruprime.go
new file mode 100644
index 000000000..c09bb6a0a
--- /dev/null
+++ b/kem/ntruprime/sntrup1013/ntruprime.go
@@ -0,0 +1,971 @@
+// Code generated from sntrup.templ.go. DO NOT EDIT.
+// Package sntrup1013 implements the IND-CCA2 secure key encapsulation mechanism
+// sntrup1013 as submitted to round 3 of the NIST PQC competition and
+// described in
+// https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
+package sntrup1013
+import (
+	"bytes"
+	cryptoRand "crypto/rand"
+	"crypto/sha512"
+	"github.com/cloudflare/circl/internal/nist"
+	"github.com/cloudflare/circl/kem"
+	"github.com/cloudflare/circl/kem/ntruprime/internal"
+	sntrupKem "github.com/cloudflare/circl/pke/ntruprime/kem"
+	ntrup "github.com/cloudflare/circl/pke/ntruprime/sntrup1013"
+type (
+	small  int8
+	Fq     int16
+	Inputs [p]small
+const (
+	p            = ntrup.P
+	q            = ntrup.Q
+	q12          = ((q - 1) / 2)
+	roundedBytes = ntrup.RoundedBytes
+	rqBytes      = ntrup.RqBytes
+	w            = ntrup.W
+	hashBytes = 32
+	smallBytes = ((p + 3) / 4)
+	inputsBytes      = smallBytes
+	ciphertextsBytes = roundedBytes
+	secretKeysBytes  = (2 * smallBytes)
+	publicKeysBytes  = rqBytes
+	confirmBytes = 32
+const (
+	// Size of seed for NewKeyFromSeed
+	// Note that during keyGen, a random small is generated until a valid one (whose reciprocal succeeds) is found
+	// The size of keySeed depends on the number of times the reciprocal fails
+	// This is why DeriveKeyPairFromGen is used to deterministically derive key pair instead of using seed
+	KeySeedSize = 4*p + p*4 + inputsBytes
+	// Size of seed for EncapsulateTo.
+	EncapsulationSeedSize = 4 * p
+	// Size of the established shared key.
+	SharedKeySize = ntrup.SharedKeySize
+	// Size of the encapsulated shared key.
+	CiphertextSize = ntrup.CiphertextSize
+	// Size of a packed public key.
+	PublicKeySize = ntrup.PublicKeySize
+	// Size of a packed private key.
+	PrivateKeySize = ntrup.PrivateKeySize
+// Arithmetic operations over GF(3)
+// A polynomial of R has all of its coefficients in (-1,0,1)
+// F3 is always represented as -1,0,1
+// so ZZ_fromF3 is a no-op
+// x must not be close to top int16
+func f3Freeze(x int16) small {
+	return small(internal.Int32ModUint14(int32(x)+1, 3)) - 1
+// Arithmetic operations over GF(q)
+/* always represented as -q12...q12 */
+/* so ZZ_fromFq is a no-op */
+/* x must not be close to top int32 */
+func fqFreeze(x int32) Fq {
+	return Fq(internal.Int32ModUint14(x+q12, q) - q12)
+// Calculates reciprocal of Fq
+func fqRecip(a1 Fq) Fq {
+	var i int = 1
+	ai := a1
+	for i < (q - 2) {
+		ai = fqFreeze(int32(a1) * int32(ai))
+		i += 1
+	}
+	return ai
+// Returns 0 if the weight w is equal to r
+// otherwise returns -1
+func weightwMask(r []small) int {
+	var weight int = 0
+	for i := 0; i < p; i++ {
+		weight += int(r[i]) & 1
+	}
+	// returns -1 if non zero
+	// otherwise returns 0 if weight==w
+	return internal.Int16NonzeroMask(int16(weight - w))
+/* R3_fromR(R_fromRq(r)) */
+func r3FromRq(out []small, r []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = small(f3Freeze(int16(r[i])))
+	}
+// h = f*g in the ring R3
+func r3Mult(h []small, f []small, g []small) {
+	fg := make([]small, p+p-1)
+	var result small
+	var i, j int
+	for i = 0; i < p; i++ {
+		result = 0
+		for j = 0; j <= i; j++ {
+			result = f3Freeze(int16(result + f[j]*g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i = p; i < p+p-1; i++ {
+		result = 0
+		for j = i - p + 1; j < p; j++ {
+			result = f3Freeze(int16(result + f[j]*g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i = p + p - 2; i >= p; i-- {
+		fg[i-p] = f3Freeze(int16(fg[i-p] + fg[i]))
+		fg[i-p+1] = f3Freeze(int16(fg[i-p+1] + fg[i]))
+	}
+	for i = 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// Calculates the reciprocal of R3 polynomials
+// Returns 0 if recip succeeded; else -1
+func r3Recip(out []small, in []small) int {
+	// out := make([]small, p)
+	f := make([]small, p+1)
+	g := make([]small, p+1)
+	v := make([]small, p+1)
+	r := make([]small, p+1)
+	var sign int
+	r[0] = 1
+	f[0] = 1
+	f[p-1] = -1
+	f[p] = -1
+	for i := 0; i < p; i++ {
+		g[p-1-i] = in[i]
+	}
+	g[p] = 0
+	delta := 1
+	for loop := 0; loop < 2*p-1; loop++ {
+		for i := p; i > 0; i-- {
+			v[i] = v[i-1]
+		}
+		v[0] = 0
+		sign = int(-g[0] * f[0])
+		var swap int = int(internal.Int16NegativeMask(int16(-delta)) & internal.Int16NonzeroMask(int16(g[0])))
+		delta ^= swap & int(delta^-delta)
+		delta += 1
+		for i := 0; i < p+1; i++ {
+			t := swap & int(f[i]^g[i])
+			f[i] ^= small(t)
+			g[i] ^= small(t)
+			t = swap & int(v[i]^r[i])
+			v[i] ^= small(t)
+			r[i] ^= small(t)
+		}
+		for i := 0; i < p+1; i++ {
+			g[i] = f3Freeze(int16(int(g[i]) + sign*int(f[i])))
+		}
+		for i := 0; i < p+1; i++ {
+			r[i] = f3Freeze(int16(int(r[i]) + sign*int(v[i])))
+		}
+		for i := 0; i < p; i++ {
+			g[i] = g[i+1]
+		}
+		g[p] = 0
+	}
+	sign = int(f[0])
+	for i := 0; i < p; i++ {
+		out[i] = small(sign * int(v[p-1-i]))
+	}
+	return internal.Int16NonzeroMask(int16(delta))
+// Polynomials mod q
+// h = f*g in the ring Rq */
+func rqMultSmall(h []Fq, f []Fq, g []small) {
+	fg := make([]Fq, p+p-1)
+	var result Fq
+	for i := 0; i < p; i++ {
+		result = 0
+		for j := 0; j <= i; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p; i < p+p-1; i++ {
+		result = 0
+		for j := i - p + 1; j < p; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p + p - 2; i >= p; i-- {
+		fg[i-p] = fqFreeze(int32(fg[i-p] + fg[i]))
+		fg[i-p+1] = fqFreeze(int32(fg[i-p+1] + fg[i]))
+	}
+	for i := 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// h = 3f in Rq
+func rqMult3(h []Fq, f []Fq) {
+	for i := 0; i < p; i++ {
+		h[i] = fqFreeze(int32(3 * f[i]))
+	}
+// Returns 0 if recip succeeded; else -1
+// out = 1/(3*in) in Rq
+func rqRecip3(out []Fq, in []small) int {
+	f := make([]Fq, p+1)
+	g := make([]Fq, p+1)
+	v := make([]Fq, p+1)
+	r := make([]Fq, p+1)
+	var swap, t int
+	var f0, g0 int32
+	r[0] = fqRecip(3)
+	f[0] = 1
+	f[p-1] = -1
+	f[p] = -1
+	for i := 0; i < p; i++ {
+		g[p-1-i] = Fq(in[i])
+	}
+	g[p] = 0
+	delta := 1
+	for loop := 0; loop < 2*p-1; loop++ {
+		for i := p; i > 0; i-- {
+			v[i] = v[i-1]
+		}
+		v[0] = 0
+		swap = internal.Int16NegativeMask(int16(-delta)) & internal.Int16NonzeroMask(int16(g[0]))
+		delta ^= swap & (delta ^ -delta)
+		delta += 1
+		for i := 0; i < p+1; i++ {
+			t = swap & int(f[i]^g[i])
+			f[i] ^= Fq(t)
+			g[i] ^= Fq(t)
+			t = swap & int(v[i]^r[i])
+			v[i] ^= Fq(t)
+			r[i] ^= Fq(t)
+		}
+		f0 = int32(f[0])
+		g0 = int32(g[0])
+		for i := 0; i < p+1; i++ {
+			g[i] = fqFreeze(f0*int32(g[i]) - g0*int32(f[i]))
+		}
+		for i := 0; i < p+1; i++ {
+			r[i] = fqFreeze(f0*int32(r[i]) - g0*int32(v[i]))
+		}
+		for i := 0; i < p; i++ {
+			g[i] = g[i+1]
+		}
+		g[p] = 0
+	}
+	scale := Fq(fqRecip(f[0]))
+	for i := 0; i < p; i++ {
+		out[i] = fqFreeze(int32(scale) * (int32)(v[p-1-i]))
+	}
+	return internal.Int16NonzeroMask(int16(delta))
+// Rounding all coefficients of a polynomial to the nearest multiple of 3
+// Rounded polynomials mod q
+func round(out []Fq, a []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = a[i] - Fq(f3Freeze(int16(a[i])))
+	}
+// Returns (min(x, y), max(x, y)), executes in constant time
+func minmax(x, y *uint32) {
+	var xi uint32 = *x
+	var yi uint32 = *y
+	var xy uint32 = xi ^ yi
+	var c uint32 = yi - xi
+	c ^= xy & (c ^ yi ^ 0x80000000)
+	c >>= 31
+	c = -c
+	c &= xy
+	*x = xi ^ c
+	*y = yi ^ c
+// Sorts the array of unsigned integers
+func cryptoSortUint32(x []uint32, n int) {
+	if n < 2 {
+		return
+	}
+	top := 1
+	for top < n-top {
+		top += top
+	}
+	for p := top; p > 0; p >>= 1 {
+		for i := 0; i < n-p; i++ {
+			if i&p == 0 {
+				minmax(&x[i], &x[i+p])
+			}
+		}
+		for q := top; q > p; q >>= 1 {
+			for i := 0; i < n-q; i++ {
+				if i&p == 0 {
+					minmax(&x[i+p], &x[i+q])
+				}
+			}
+		}
+	}
+// Sorting to generate short polynomial
+func shortFromList(out []small, in []int32) {
+	L := make([]uint32, p)
+	var neg2, neg3 int = -2, -3
+	for i := 0; i < w; i++ {
+		L[i] = uint32(in[i]) & uint32((neg2))
+	}
+	for i := w; i < p; i++ {
+		L[i] = (uint32(in[i]) & uint32((neg3))) | 1
+	}
+	cryptoSortUint32(L, p)
+	for i := 0; i < p; i++ {
+		out[i] = small((L[i] & 3) - 1)
+	}
+//  Underlying hash function
+// The input byte array, in, is prepended by the byte b
+// and its SHA-512 hash is calculated
+// Only the first 32 bytes of the hash are returned
+// e.g., b = 0 means out = Hash0(in)
+func hashPrefix(out []byte, b int, in []byte, inlen int) {
+	x := make([]byte, inlen+1)
+	h := make([]byte, 64)
+	x[0] = byte(b)
+	copy(x[1:], in)
+	hash := sha512.New()
+	hash.Write([]byte(x))
+	h = hash.Sum(nil)
+	copy(out, h[:32])
+// Higher level randomness
+// Returns a random unsigned integer
+func urandom32(seed []byte) uint32 {
+	var out [4]uint32
+	out[0] = uint32(seed[0])
+	out[1] = uint32(seed[1]) << 8
+	out[2] = uint32(seed[2]) << 16
+	out[3] = uint32(seed[3]) << 24
+	return out[0] + out[1] + out[2] + out[3]
+// Generates a random short polynomial
+func shortRandom(out []small, seed []byte) {
+	L := make([]uint32, p)
+	for i := 0; i < p; i++ {
+		L[i] = urandom32(seed[4*i : 4*i+4])
+	}
+	// Converts uint32 array to int32 array
+	L_int32 := make([]int32, p)
+	for i := 0; i < len(L); i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// Generates a random list of small
+func smallRandom(out []small, seed []byte) {
+	for i := 0; i < p; i++ {
+		out[i] = small(((urandom32(seed[4*i:4*i+4])&0x3fffffff)*3)>>30) - 1
+	}
+// Streamlined NTRU Prime Core
+// h,(f,ginv) = keyGen()
+func keyGen(h []Fq, f []small, ginv []small, gen *nist.DRBG) {
+	g := make([]small, p)
+	seed := make([]byte, 4*p+4*p)
+	if gen == nil {
+		for {
+			cryptoRand.Read(seed[:4*p])
+			smallRandom(g, seed[:4*p])
+			if r3Recip(ginv, g) == 0 {
+				break
+			}
+		}
+		cryptoRand.Read(seed[4*p:])
+	} else {
+		for {
+			for i := 0; i < p; i++ {
+				gen.Fill(seed[4*i : 4*i+4])
+			}
+			smallRandom(g, seed[:4*p])
+			if r3Recip(ginv, g) == 0 {
+				break
+			}
+		}
+		for i := 0; i < p; i++ {
+			gen.Fill(seed[4*p+4*i : 4*p+4*i+4])
+		}
+	}
+	shortRandom(f, seed[4*p:])
+	finv := make([]Fq, p)
+	rqRecip3(finv, f) /* always works */
+	rqMultSmall(h, finv, g)
+// c = encrypt(r,h)
+func encrypt(c []Fq, r []small, h []Fq) {
+	hr := make([]Fq, p)
+	rqMultSmall(hr, h, r)
+	round(c, hr)
+// r = decrypt(c,(f,ginv))
+func decrypt(r []small, c []Fq, f []small, ginv []small) {
+	cf := make([]Fq, p)
+	cf3 := make([]Fq, p)
+	e := make([]small, p)
+	ev := make([]small, p)
+	rqMultSmall(cf, c, f)
+	rqMult3(cf3, cf)
+	r3FromRq(e, cf3)
+	r3Mult(ev, e, ginv)
+	mask := weightwMask(ev) /* 0 if weight w, else -1 */
+	for i := 0; i < w; i++ {
+		r[i] = ((ev[i] ^ 1) & small(^mask)) ^ 1
+	}
+	for i := w; i < p; i++ {
+		r[i] = ev[i] & small(^mask)
+	}
+// Encoding small polynomials (including short polynomials)
+// Transform polynomial in R to bytes
+// these are the only functions that rely on p mod 4 = 1 */
+func smallEncode(s []byte, f []small) {
+	var x small
+	var index int = 0
+	for i := 0; i < p/4; i++ {
+		x = f[index] + 1
+		index++
+		x += (f[index] + 1) << 2
+		index++
+		x += (f[index] + 1) << 4
+		index++
+		x += (f[index] + 1) << 6
+		index++
+		s[0] = byte(x)
+		s = s[1:]
+	}
+	x = f[index] + 1
+	s[0] = byte(x)
+// Transform bytes into polynomial in R
+func smallDecode(f []small, s []byte) {
+	var index int = 0
+	var x byte
+	for i := 0; i < p/4; i++ {
+		x = s[0]
+		s = s[1:]
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		index++
+	}
+	x = s[0]
+	f[index] = ((small)(x & 3)) - 1
+// Encoding general polynomials
+// Transform polynomials in R/q to bytes
+func rqEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16(r[i] + q12)
+		M[i] = q
+	}
+	internal.Encode(s, R, M, p)
+// Transform polynomials in R/q from bytes
+func rqDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = q
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = ((Fq)(R[i])) - q12
+	}
+// Encoding rounded polynomials
+// Transform rounded polynomials to bytes
+func roundedEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16((int32((r[i])+q12) * 10923) >> 15)
+		M[i] = (q + 2) / 3
+	}
+	internal.Encode(s, R, M, p)
+// Transform bytes to rounded polynomials
+func roundedDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = (q + 2) / 3
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = Fq(R[i]*3 - q12)
+	}
+// Streamlined NTRU Prime Core plus encoding
+// Generates public key and private key
+// pk,sk = zKeyGen()
+func zKeyGen(pk []byte, sk []byte, gen *nist.DRBG) {
+	h := make([]Fq, p)
+	f := make([]small, p)
+	v := make([]small, p)
+	keyGen(h, f, v, gen)
+	rqEncode(pk, h)
+	smallEncode(sk, f)
+	sk = sk[smallBytes:]
+	smallEncode(sk, v)
+// C = zEncrypt(r,pk)
+func zEncrypt(C []byte, r Inputs, pk []byte) {
+	h := make([]Fq, p)
+	c := make([]Fq, p)
+	rqDecode(h, pk)
+	encrypt(c, r[:], h)
+	roundedEncode(C, c)
+// r = zDecrypt(C,sk)
+func zDecrypt(r *Inputs, C []byte, sk []byte) {
+	f := make([]small, p)
+	v := make([]small, p)
+	c := make([]Fq, p)
+	smallDecode(f, sk)
+	sk = sk[smallBytes:]
+	smallDecode(v, sk)
+	roundedDecode(c, C)
+	decrypt(r[:], c, f, v)
+// Confirmation hash
+// h = hashConfirm(r,pk,cache); cache is Hash4(pk)
+func hashConfirm(h []byte, r []byte, pk []byte, cache []byte) {
+	x := make([]byte, hashBytes*2)
+	hashPrefix(x, 3, r, inputsBytes)
+	copy(x[hashBytes:], cache[:hashBytes])
+	hashPrefix(h, 2, x, len(x))
+// Session-key hash
+// k = hashSession(b,y,z)
+func hashSession(k []byte, b int, y []byte, z []byte) {
+	x := make([]byte, hashBytes+ciphertextsBytes+confirmBytes)
+	hashPrefix(x, 3, y, inputsBytes)
+	copy(x[hashBytes:], z[:ciphertextsBytes+confirmBytes])
+	hashPrefix(k, b, x, len(x))
+//  Streamlined NTRU Prime
+// pk,sk = kemKeyGen()
+func kemKeyGen(pk []byte, sk []byte, gen *nist.DRBG) {
+	zKeyGen(pk, sk, gen)
+	sk = sk[secretKeysBytes:]
+	copy(sk, pk)
+	sk = sk[publicKeysBytes:]
+	if gen != nil {
+		gen.Fill(sk[:inputsBytes])
+	} else {
+		cryptoRand.Read(sk[:inputsBytes])
+	}
+	sk = sk[inputsBytes:]
+	hashPrefix(sk, 4, pk, publicKeysBytes)
+// c,r_enc = hide(r,pk,cache); cache is Hash4(pk)
+func hide(c []byte, r_enc []byte, r Inputs, pk []byte, cache []byte) {
+	smallEncode(r_enc, r[:])
+	zEncrypt(c, r, pk)
+	c = c[ciphertextsBytes:]
+	hashConfirm(c, r_enc, pk, cache)
+// Takes as input a public key
+// Returns ciphertext and shared key
+// c,k = encap(pk)
+func (pub PublicKey) EncapsulateTo(c []byte, k []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, 4*p)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != 4*p {
+		panic("seed must be of length EncapsulationSeedSize")
+	}
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	pk := pub.pk[:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cache := make([]byte, hashBytes)
+	hashPrefix(cache, 4, pk, publicKeysBytes)
+	shortRandom(r[:], seed)
+	hide(c, r_enc, r, pk, cache)
+	hashSession(k, 1, r_enc, c)
+// Returns 0 if matching ciphertext+confirm, else -1
+func ciphertexts_diff_mask(c []byte, c2 []byte) int {
+	var differentbits uint16 = 0
+	var len int = ciphertextsBytes + confirmBytes
+	for i := 0; i < len; i++ {
+		differentbits |= uint16((c[i]) ^ (c2[i]))
+	}
+	return int((1 & ((differentbits - 1) >> 8)) - 1)
+// Returns shared key from ciphertext and private key
+// k = decap(c,sk)
+func (priv *PrivateKey) DecapsulateTo(k []byte, c []byte) {
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	sk := priv.sk[:]
+	pk := sk[secretKeysBytes:]
+	rho := pk[publicKeysBytes:]
+	cache := rho[inputsBytes:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cnew := make([]byte, ciphertextsBytes+confirmBytes)
+	zDecrypt(&r, c, sk)
+	hide(cnew, r_enc, r, pk, cache)
+	var mask int = ciphertexts_diff_mask(c, cnew)
+	for i := 0; i < inputsBytes; i++ {
+		r_enc[i] ^= byte(mask & int(r_enc[i]^rho[i]))
+	}
+	hashSession(k, 1+mask, r_enc, c)
+// The structure of the private key is given by the following segments:
+// The secret key, the public key, entropy and the hash of the public key
+type PrivateKey struct {
+	sk [PrivateKeySize]byte
+type PublicKey struct {
+	pk [PublicKeySize]byte
+type scheme struct{}
+var sch sntrupKem.Scheme = &scheme{}
+// Scheme returns a KEM interface.
+func Scheme() kem.Scheme { return sch }
+// SntrupScheme returns a sntrup.KEM interface
+func SntrupScheme() sntrupKem.Scheme { return sch }
+func (*scheme) Name() string               { return "sntrup1013" }
+func (*scheme) PublicKeySize() int         { return PublicKeySize }
+func (*scheme) PrivateKeySize() int        { return PrivateKeySize }
+func (*scheme) SeedSize() int              { return KeySeedSize }
+func (*scheme) SharedKeySize() int         { return SharedKeySize }
+func (*scheme) CiphertextSize() int        { return CiphertextSize }
+func (*scheme) EncapsulationSeedSize() int { return EncapsulationSeedSize }
+func (sk *PrivateKey) Scheme() kem.Scheme { return sch }
+func (pk *PublicKey) Scheme() kem.Scheme  { return sch }
+func (sk *PrivateKey) MarshalBinary() ([]byte, error) {
+	var ret [PrivateKeySize]byte
+	copy(ret[:], sk.sk[:])
+	return ret[:], nil
+func (sk *PrivateKey) Equal(other kem.PrivateKey) bool {
+	oth, ok := other.(*PrivateKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(sk.sk[:], oth.sk[:])
+func (pk *PublicKey) Equal(other kem.PublicKey) bool {
+	oth, ok := other.(*PublicKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(pk.pk[:], oth.pk[:])
+func (sk *PrivateKey) Public() kem.PublicKey {
+	var pk [PublicKeySize]byte
+	skey, _ := sk.MarshalBinary()
+	ppk := skey[secretKeysBytes : secretKeysBytes+publicKeysBytes]
+	copy(pk[:], ppk[:])
+	return &PublicKey{pk: pk}
+func (pk *PublicKey) MarshalBinary() ([]byte, error) {
+	var ret [PublicKeySize]byte
+	copy(ret[:], pk.pk[:])
+	return ret[:], nil
+func (*scheme) GenerateKeyPair() (kem.PublicKey, kem.PrivateKey, error) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], nil)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}, nil
+// Not used
+func (*scheme) DeriveKeyPair(seed []byte) (kem.PublicKey, kem.PrivateKey) {
+	return nil, nil
+func (*scheme) DeriveKeyPairFromGen(gen *nist.DRBG) (kem.PublicKey, kem.PrivateKey) {
+	if gen == nil {
+		panic("A nist DRBG must be provided")
+	}
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], gen)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}
+func (*scheme) Encapsulate(pk kem.PublicKey) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, nil)
+	return ct, ss, nil
+func (*scheme) EncapsulateDeterministically(pk kem.PublicKey, seed []byte) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, seed)
+	return ct, ss, nil
+func (*scheme) Decapsulate(sk kem.PrivateKey, ct []byte) ([]byte, error) {
+	ssk, ok := sk.(*PrivateKey)
+	if !ok {
+		return nil, kem.ErrTypeMismatch
+	}
+	if len(ct) != CiphertextSize {
+		return nil, kem.ErrCiphertextSize
+	}
+	ss := [SharedKeySize]byte{}
+	ssk.DecapsulateTo(ss[:], ct)
+	return ss[:], nil
+func (*scheme) UnmarshalBinaryPublicKey(buf []byte) (kem.PublicKey, error) {
+	if len(buf) != PublicKeySize {
+		return nil, kem.ErrPubKeySize
+	}
+	pk := [PublicKeySize]byte{}
+	copy(pk[:], buf)
+	return &PublicKey{pk: pk}, nil
+func (*scheme) UnmarshalBinaryPrivateKey(buf []byte) (kem.PrivateKey, error) {
+	if len(buf) != PrivateKeySize {
+		return nil, kem.ErrPrivKeySize
+	}
+	sk := [PrivateKeySize]byte{}
+	copy(sk[:], buf)
+	return &PrivateKey{sk: sk}, nil
diff --git a/kem/ntruprime/sntrup1277/ntruprime.go b/kem/ntruprime/sntrup1277/ntruprime.go
new file mode 100644
index 000000000..a1318569d
--- /dev/null
+++ b/kem/ntruprime/sntrup1277/ntruprime.go
@@ -0,0 +1,971 @@
+// Code generated from sntrup.templ.go. DO NOT EDIT.
+// Package sntrup1277 implements the IND-CCA2 secure key encapsulation mechanism
+// sntrup1277 as submitted to round 3 of the NIST PQC competition and
+// described in
+// https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
+package sntrup1277
+import (
+	"bytes"
+	cryptoRand "crypto/rand"
+	"crypto/sha512"
+	"github.com/cloudflare/circl/internal/nist"
+	"github.com/cloudflare/circl/kem"
+	"github.com/cloudflare/circl/kem/ntruprime/internal"
+	sntrupKem "github.com/cloudflare/circl/pke/ntruprime/kem"
+	ntrup "github.com/cloudflare/circl/pke/ntruprime/sntrup1277"
+type (
+	small  int8
+	Fq     int16
+	Inputs [p]small
+const (
+	p            = ntrup.P
+	q            = ntrup.Q
+	q12          = ((q - 1) / 2)
+	roundedBytes = ntrup.RoundedBytes
+	rqBytes      = ntrup.RqBytes
+	w            = ntrup.W
+	hashBytes = 32
+	smallBytes = ((p + 3) / 4)
+	inputsBytes      = smallBytes
+	ciphertextsBytes = roundedBytes
+	secretKeysBytes  = (2 * smallBytes)
+	publicKeysBytes  = rqBytes
+	confirmBytes = 32
+const (
+	// Size of seed for NewKeyFromSeed
+	// Note that during keyGen, a random small is generated until a valid one (whose reciprocal succeeds) is found
+	// The size of keySeed depends on the number of times the reciprocal fails
+	// This is why DeriveKeyPairFromGen is used to deterministically derive key pair instead of using seed
+	KeySeedSize = 4*p + p*4 + inputsBytes
+	// Size of seed for EncapsulateTo.
+	EncapsulationSeedSize = 4 * p
+	// Size of the established shared key.
+	SharedKeySize = ntrup.SharedKeySize
+	// Size of the encapsulated shared key.
+	CiphertextSize = ntrup.CiphertextSize
+	// Size of a packed public key.
+	PublicKeySize = ntrup.PublicKeySize
+	// Size of a packed private key.
+	PrivateKeySize = ntrup.PrivateKeySize
+// Arithmetic operations over GF(3)
+// A polynomial of R has all of its coefficients in (-1,0,1)
+// F3 is always represented as -1,0,1
+// so ZZ_fromF3 is a no-op
+// x must not be close to top int16
+func f3Freeze(x int16) small {
+	return small(internal.Int32ModUint14(int32(x)+1, 3)) - 1
+// Arithmetic operations over GF(q)
+/* always represented as -q12...q12 */
+/* so ZZ_fromFq is a no-op */
+/* x must not be close to top int32 */
+func fqFreeze(x int32) Fq {
+	return Fq(internal.Int32ModUint14(x+q12, q) - q12)
+// Calculates reciprocal of Fq
+func fqRecip(a1 Fq) Fq {
+	var i int = 1
+	ai := a1
+	for i < (q - 2) {
+		ai = fqFreeze(int32(a1) * int32(ai))
+		i += 1
+	}
+	return ai
+// Returns 0 if the weight w is equal to r
+// otherwise returns -1
+func weightwMask(r []small) int {
+	var weight int = 0
+	for i := 0; i < p; i++ {
+		weight += int(r[i]) & 1
+	}
+	// returns -1 if non zero
+	// otherwise returns 0 if weight==w
+	return internal.Int16NonzeroMask(int16(weight - w))
+/* R3_fromR(R_fromRq(r)) */
+func r3FromRq(out []small, r []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = small(f3Freeze(int16(r[i])))
+	}
+// h = f*g in the ring R3
+func r3Mult(h []small, f []small, g []small) {
+	fg := make([]small, p+p-1)
+	var result small
+	var i, j int
+	for i = 0; i < p; i++ {
+		result = 0
+		for j = 0; j <= i; j++ {
+			result = f3Freeze(int16(result + f[j]*g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i = p; i < p+p-1; i++ {
+		result = 0
+		for j = i - p + 1; j < p; j++ {
+			result = f3Freeze(int16(result + f[j]*g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i = p + p - 2; i >= p; i-- {
+		fg[i-p] = f3Freeze(int16(fg[i-p] + fg[i]))
+		fg[i-p+1] = f3Freeze(int16(fg[i-p+1] + fg[i]))
+	}
+	for i = 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// Calculates the reciprocal of R3 polynomials
+// Returns 0 if recip succeeded; else -1
+func r3Recip(out []small, in []small) int {
+	// out := make([]small, p)
+	f := make([]small, p+1)
+	g := make([]small, p+1)
+	v := make([]small, p+1)
+	r := make([]small, p+1)
+	var sign int
+	r[0] = 1
+	f[0] = 1
+	f[p-1] = -1
+	f[p] = -1
+	for i := 0; i < p; i++ {
+		g[p-1-i] = in[i]
+	}
+	g[p] = 0
+	delta := 1
+	for loop := 0; loop < 2*p-1; loop++ {
+		for i := p; i > 0; i-- {
+			v[i] = v[i-1]
+		}
+		v[0] = 0
+		sign = int(-g[0] * f[0])
+		var swap int = int(internal.Int16NegativeMask(int16(-delta)) & internal.Int16NonzeroMask(int16(g[0])))
+		delta ^= swap & int(delta^-delta)
+		delta += 1
+		for i := 0; i < p+1; i++ {
+			t := swap & int(f[i]^g[i])
+			f[i] ^= small(t)
+			g[i] ^= small(t)
+			t = swap & int(v[i]^r[i])
+			v[i] ^= small(t)
+			r[i] ^= small(t)
+		}
+		for i := 0; i < p+1; i++ {
+			g[i] = f3Freeze(int16(int(g[i]) + sign*int(f[i])))
+		}
+		for i := 0; i < p+1; i++ {
+			r[i] = f3Freeze(int16(int(r[i]) + sign*int(v[i])))
+		}
+		for i := 0; i < p; i++ {
+			g[i] = g[i+1]
+		}
+		g[p] = 0
+	}
+	sign = int(f[0])
+	for i := 0; i < p; i++ {
+		out[i] = small(sign * int(v[p-1-i]))
+	}
+	return internal.Int16NonzeroMask(int16(delta))
+// Polynomials mod q
+// h = f*g in the ring Rq */
+func rqMultSmall(h []Fq, f []Fq, g []small) {
+	fg := make([]Fq, p+p-1)
+	var result Fq
+	for i := 0; i < p; i++ {
+		result = 0
+		for j := 0; j <= i; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p; i < p+p-1; i++ {
+		result = 0
+		for j := i - p + 1; j < p; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p + p - 2; i >= p; i-- {
+		fg[i-p] = fqFreeze(int32(fg[i-p] + fg[i]))
+		fg[i-p+1] = fqFreeze(int32(fg[i-p+1] + fg[i]))
+	}
+	for i := 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// h = 3f in Rq
+func rqMult3(h []Fq, f []Fq) {
+	for i := 0; i < p; i++ {
+		h[i] = fqFreeze(int32(3 * f[i]))
+	}
+// Returns 0 if recip succeeded; else -1
+// out = 1/(3*in) in Rq
+func rqRecip3(out []Fq, in []small) int {
+	f := make([]Fq, p+1)
+	g := make([]Fq, p+1)
+	v := make([]Fq, p+1)
+	r := make([]Fq, p+1)
+	var swap, t int
+	var f0, g0 int32
+	r[0] = fqRecip(3)
+	f[0] = 1
+	f[p-1] = -1
+	f[p] = -1
+	for i := 0; i < p; i++ {
+		g[p-1-i] = Fq(in[i])
+	}
+	g[p] = 0
+	delta := 1
+	for loop := 0; loop < 2*p-1; loop++ {
+		for i := p; i > 0; i-- {
+			v[i] = v[i-1]
+		}
+		v[0] = 0
+		swap = internal.Int16NegativeMask(int16(-delta)) & internal.Int16NonzeroMask(int16(g[0]))
+		delta ^= swap & (delta ^ -delta)
+		delta += 1
+		for i := 0; i < p+1; i++ {
+			t = swap & int(f[i]^g[i])
+			f[i] ^= Fq(t)
+			g[i] ^= Fq(t)
+			t = swap & int(v[i]^r[i])
+			v[i] ^= Fq(t)
+			r[i] ^= Fq(t)
+		}
+		f0 = int32(f[0])
+		g0 = int32(g[0])
+		for i := 0; i < p+1; i++ {
+			g[i] = fqFreeze(f0*int32(g[i]) - g0*int32(f[i]))
+		}
+		for i := 0; i < p+1; i++ {
+			r[i] = fqFreeze(f0*int32(r[i]) - g0*int32(v[i]))
+		}
+		for i := 0; i < p; i++ {
+			g[i] = g[i+1]
+		}
+		g[p] = 0
+	}
+	scale := Fq(fqRecip(f[0]))
+	for i := 0; i < p; i++ {
+		out[i] = fqFreeze(int32(scale) * (int32)(v[p-1-i]))
+	}
+	return internal.Int16NonzeroMask(int16(delta))
+// Rounding all coefficients of a polynomial to the nearest multiple of 3
+// Rounded polynomials mod q
+func round(out []Fq, a []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = a[i] - Fq(f3Freeze(int16(a[i])))
+	}
+// Returns (min(x, y), max(x, y)), executes in constant time
+func minmax(x, y *uint32) {
+	var xi uint32 = *x
+	var yi uint32 = *y
+	var xy uint32 = xi ^ yi
+	var c uint32 = yi - xi
+	c ^= xy & (c ^ yi ^ 0x80000000)
+	c >>= 31
+	c = -c
+	c &= xy
+	*x = xi ^ c
+	*y = yi ^ c
+// Sorts the array of unsigned integers
+func cryptoSortUint32(x []uint32, n int) {
+	if n < 2 {
+		return
+	}
+	top := 1
+	for top < n-top {
+		top += top
+	}
+	for p := top; p > 0; p >>= 1 {
+		for i := 0; i < n-p; i++ {
+			if i&p == 0 {
+				minmax(&x[i], &x[i+p])
+			}
+		}
+		for q := top; q > p; q >>= 1 {
+			for i := 0; i < n-q; i++ {
+				if i&p == 0 {
+					minmax(&x[i+p], &x[i+q])
+				}
+			}
+		}
+	}
+// Sorting to generate short polynomial
+func shortFromList(out []small, in []int32) {
+	L := make([]uint32, p)
+	var neg2, neg3 int = -2, -3
+	for i := 0; i < w; i++ {
+		L[i] = uint32(in[i]) & uint32((neg2))
+	}
+	for i := w; i < p; i++ {
+		L[i] = (uint32(in[i]) & uint32((neg3))) | 1
+	}
+	cryptoSortUint32(L, p)
+	for i := 0; i < p; i++ {
+		out[i] = small((L[i] & 3) - 1)
+	}
+//  Underlying hash function
+// The input byte array, in, is prepended by the byte b
+// and its SHA-512 hash is calculated
+// Only the first 32 bytes of the hash are returned
+// e.g., b = 0 means out = Hash0(in)
+func hashPrefix(out []byte, b int, in []byte, inlen int) {
+	x := make([]byte, inlen+1)
+	h := make([]byte, 64)
+	x[0] = byte(b)
+	copy(x[1:], in)
+	hash := sha512.New()
+	hash.Write([]byte(x))
+	h = hash.Sum(nil)
+	copy(out, h[:32])
+// Higher level randomness
+// Returns a random unsigned integer
+func urandom32(seed []byte) uint32 {
+	var out [4]uint32
+	out[0] = uint32(seed[0])
+	out[1] = uint32(seed[1]) << 8
+	out[2] = uint32(seed[2]) << 16
+	out[3] = uint32(seed[3]) << 24
+	return out[0] + out[1] + out[2] + out[3]
+// Generates a random short polynomial
+func shortRandom(out []small, seed []byte) {
+	L := make([]uint32, p)
+	for i := 0; i < p; i++ {
+		L[i] = urandom32(seed[4*i : 4*i+4])
+	}
+	// Converts uint32 array to int32 array
+	L_int32 := make([]int32, p)
+	for i := 0; i < len(L); i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// Generates a random list of small
+func smallRandom(out []small, seed []byte) {
+	for i := 0; i < p; i++ {
+		out[i] = small(((urandom32(seed[4*i:4*i+4])&0x3fffffff)*3)>>30) - 1
+	}
+// Streamlined NTRU Prime Core
+// h,(f,ginv) = keyGen()
+func keyGen(h []Fq, f []small, ginv []small, gen *nist.DRBG) {
+	g := make([]small, p)
+	seed := make([]byte, 4*p+4*p)
+	if gen == nil {
+		for {
+			cryptoRand.Read(seed[:4*p])
+			smallRandom(g, seed[:4*p])
+			if r3Recip(ginv, g) == 0 {
+				break
+			}
+		}
+		cryptoRand.Read(seed[4*p:])
+	} else {
+		for {
+			for i := 0; i < p; i++ {
+				gen.Fill(seed[4*i : 4*i+4])
+			}
+			smallRandom(g, seed[:4*p])
+			if r3Recip(ginv, g) == 0 {
+				break
+			}
+		}
+		for i := 0; i < p; i++ {
+			gen.Fill(seed[4*p+4*i : 4*p+4*i+4])
+		}
+	}
+	shortRandom(f, seed[4*p:])
+	finv := make([]Fq, p)
+	rqRecip3(finv, f) /* always works */
+	rqMultSmall(h, finv, g)
+// c = encrypt(r,h)
+func encrypt(c []Fq, r []small, h []Fq) {
+	hr := make([]Fq, p)
+	rqMultSmall(hr, h, r)
+	round(c, hr)
+// r = decrypt(c,(f,ginv))
+func decrypt(r []small, c []Fq, f []small, ginv []small) {
+	cf := make([]Fq, p)
+	cf3 := make([]Fq, p)
+	e := make([]small, p)
+	ev := make([]small, p)
+	rqMultSmall(cf, c, f)
+	rqMult3(cf3, cf)
+	r3FromRq(e, cf3)
+	r3Mult(ev, e, ginv)
+	mask := weightwMask(ev) /* 0 if weight w, else -1 */
+	for i := 0; i < w; i++ {
+		r[i] = ((ev[i] ^ 1) & small(^mask)) ^ 1
+	}
+	for i := w; i < p; i++ {
+		r[i] = ev[i] & small(^mask)
+	}
+// Encoding small polynomials (including short polynomials)
+// Transform polynomial in R to bytes
+// these are the only functions that rely on p mod 4 = 1 */
+func smallEncode(s []byte, f []small) {
+	var x small
+	var index int = 0
+	for i := 0; i < p/4; i++ {
+		x = f[index] + 1
+		index++
+		x += (f[index] + 1) << 2
+		index++
+		x += (f[index] + 1) << 4
+		index++
+		x += (f[index] + 1) << 6
+		index++
+		s[0] = byte(x)
+		s = s[1:]
+	}
+	x = f[index] + 1
+	s[0] = byte(x)
+// Transform bytes into polynomial in R
+func smallDecode(f []small, s []byte) {
+	var index int = 0
+	var x byte
+	for i := 0; i < p/4; i++ {
+		x = s[0]
+		s = s[1:]
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		index++
+	}
+	x = s[0]
+	f[index] = ((small)(x & 3)) - 1
+// Encoding general polynomials
+// Transform polynomials in R/q to bytes
+func rqEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16(r[i] + q12)
+		M[i] = q
+	}
+	internal.Encode(s, R, M, p)
+// Transform polynomials in R/q from bytes
+func rqDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = q
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = ((Fq)(R[i])) - q12
+	}
+// Encoding rounded polynomials
+// Transform rounded polynomials to bytes
+func roundedEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16((int32((r[i])+q12) * 10923) >> 15)
+		M[i] = (q + 2) / 3
+	}
+	internal.Encode(s, R, M, p)
+// Transform bytes to rounded polynomials
+func roundedDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = (q + 2) / 3
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = Fq(R[i]*3 - q12)
+	}
+// Streamlined NTRU Prime Core plus encoding
+// Generates public key and private key
+// pk,sk = zKeyGen()
+func zKeyGen(pk []byte, sk []byte, gen *nist.DRBG) {
+	h := make([]Fq, p)
+	f := make([]small, p)
+	v := make([]small, p)
+	keyGen(h, f, v, gen)
+	rqEncode(pk, h)
+	smallEncode(sk, f)
+	sk = sk[smallBytes:]
+	smallEncode(sk, v)
+// C = zEncrypt(r,pk)
+func zEncrypt(C []byte, r Inputs, pk []byte) {
+	h := make([]Fq, p)
+	c := make([]Fq, p)
+	rqDecode(h, pk)
+	encrypt(c, r[:], h)
+	roundedEncode(C, c)
+// r = zDecrypt(C,sk)
+func zDecrypt(r *Inputs, C []byte, sk []byte) {
+	f := make([]small, p)
+	v := make([]small, p)
+	c := make([]Fq, p)
+	smallDecode(f, sk)
+	sk = sk[smallBytes:]
+	smallDecode(v, sk)
+	roundedDecode(c, C)
+	decrypt(r[:], c, f, v)
+// Confirmation hash
+// h = hashConfirm(r,pk,cache); cache is Hash4(pk)
+func hashConfirm(h []byte, r []byte, pk []byte, cache []byte) {
+	x := make([]byte, hashBytes*2)
+	hashPrefix(x, 3, r, inputsBytes)
+	copy(x[hashBytes:], cache[:hashBytes])
+	hashPrefix(h, 2, x, len(x))
+// Session-key hash
+// k = hashSession(b,y,z)
+func hashSession(k []byte, b int, y []byte, z []byte) {
+	x := make([]byte, hashBytes+ciphertextsBytes+confirmBytes)
+	hashPrefix(x, 3, y, inputsBytes)
+	copy(x[hashBytes:], z[:ciphertextsBytes+confirmBytes])
+	hashPrefix(k, b, x, len(x))
+//  Streamlined NTRU Prime
+// pk,sk = kemKeyGen()
+func kemKeyGen(pk []byte, sk []byte, gen *nist.DRBG) {
+	zKeyGen(pk, sk, gen)
+	sk = sk[secretKeysBytes:]
+	copy(sk, pk)
+	sk = sk[publicKeysBytes:]
+	if gen != nil {
+		gen.Fill(sk[:inputsBytes])
+	} else {
+		cryptoRand.Read(sk[:inputsBytes])
+	}
+	sk = sk[inputsBytes:]
+	hashPrefix(sk, 4, pk, publicKeysBytes)
+// c,r_enc = hide(r,pk,cache); cache is Hash4(pk)
+func hide(c []byte, r_enc []byte, r Inputs, pk []byte, cache []byte) {
+	smallEncode(r_enc, r[:])
+	zEncrypt(c, r, pk)
+	c = c[ciphertextsBytes:]
+	hashConfirm(c, r_enc, pk, cache)
+// Takes as input a public key
+// Returns ciphertext and shared key
+// c,k = encap(pk)
+func (pub PublicKey) EncapsulateTo(c []byte, k []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, 4*p)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != 4*p {
+		panic("seed must be of length EncapsulationSeedSize")
+	}
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	pk := pub.pk[:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cache := make([]byte, hashBytes)
+	hashPrefix(cache, 4, pk, publicKeysBytes)
+	shortRandom(r[:], seed)
+	hide(c, r_enc, r, pk, cache)
+	hashSession(k, 1, r_enc, c)
+// Returns 0 if matching ciphertext+confirm, else -1
+func ciphertexts_diff_mask(c []byte, c2 []byte) int {
+	var differentbits uint16 = 0
+	var len int = ciphertextsBytes + confirmBytes
+	for i := 0; i < len; i++ {
+		differentbits |= uint16((c[i]) ^ (c2[i]))
+	}
+	return int((1 & ((differentbits - 1) >> 8)) - 1)
+// Returns shared key from ciphertext and private key
+// k = decap(c,sk)
+func (priv *PrivateKey) DecapsulateTo(k []byte, c []byte) {
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	sk := priv.sk[:]
+	pk := sk[secretKeysBytes:]
+	rho := pk[publicKeysBytes:]
+	cache := rho[inputsBytes:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cnew := make([]byte, ciphertextsBytes+confirmBytes)
+	zDecrypt(&r, c, sk)
+	hide(cnew, r_enc, r, pk, cache)
+	var mask int = ciphertexts_diff_mask(c, cnew)
+	for i := 0; i < inputsBytes; i++ {
+		r_enc[i] ^= byte(mask & int(r_enc[i]^rho[i]))
+	}
+	hashSession(k, 1+mask, r_enc, c)
+// The structure of the private key is given by the following segments:
+// The secret key, the public key, entropy and the hash of the public key
+type PrivateKey struct {
+	sk [PrivateKeySize]byte
+type PublicKey struct {
+	pk [PublicKeySize]byte
+type scheme struct{}
+var sch sntrupKem.Scheme = &scheme{}
+// Scheme returns a KEM interface.
+func Scheme() kem.Scheme { return sch }
+// SntrupScheme returns a sntrup.KEM interface
+func SntrupScheme() sntrupKem.Scheme { return sch }
+func (*scheme) Name() string               { return "sntrup1277" }
+func (*scheme) PublicKeySize() int         { return PublicKeySize }
+func (*scheme) PrivateKeySize() int        { return PrivateKeySize }
+func (*scheme) SeedSize() int              { return KeySeedSize }
+func (*scheme) SharedKeySize() int         { return SharedKeySize }
+func (*scheme) CiphertextSize() int        { return CiphertextSize }
+func (*scheme) EncapsulationSeedSize() int { return EncapsulationSeedSize }
+func (sk *PrivateKey) Scheme() kem.Scheme { return sch }
+func (pk *PublicKey) Scheme() kem.Scheme  { return sch }
+func (sk *PrivateKey) MarshalBinary() ([]byte, error) {
+	var ret [PrivateKeySize]byte
+	copy(ret[:], sk.sk[:])
+	return ret[:], nil
+func (sk *PrivateKey) Equal(other kem.PrivateKey) bool {
+	oth, ok := other.(*PrivateKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(sk.sk[:], oth.sk[:])
+func (pk *PublicKey) Equal(other kem.PublicKey) bool {
+	oth, ok := other.(*PublicKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(pk.pk[:], oth.pk[:])
+func (sk *PrivateKey) Public() kem.PublicKey {
+	var pk [PublicKeySize]byte
+	skey, _ := sk.MarshalBinary()
+	ppk := skey[secretKeysBytes : secretKeysBytes+publicKeysBytes]
+	copy(pk[:], ppk[:])
+	return &PublicKey{pk: pk}
+func (pk *PublicKey) MarshalBinary() ([]byte, error) {
+	var ret [PublicKeySize]byte
+	copy(ret[:], pk.pk[:])
+	return ret[:], nil
+func (*scheme) GenerateKeyPair() (kem.PublicKey, kem.PrivateKey, error) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], nil)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}, nil
+// Not used
+func (*scheme) DeriveKeyPair(seed []byte) (kem.PublicKey, kem.PrivateKey) {
+	return nil, nil
+func (*scheme) DeriveKeyPairFromGen(gen *nist.DRBG) (kem.PublicKey, kem.PrivateKey) {
+	if gen == nil {
+		panic("A nist DRBG must be provided")
+	}
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], gen)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}
+func (*scheme) Encapsulate(pk kem.PublicKey) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, nil)
+	return ct, ss, nil
+func (*scheme) EncapsulateDeterministically(pk kem.PublicKey, seed []byte) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, seed)
+	return ct, ss, nil
+func (*scheme) Decapsulate(sk kem.PrivateKey, ct []byte) ([]byte, error) {
+	ssk, ok := sk.(*PrivateKey)
+	if !ok {
+		return nil, kem.ErrTypeMismatch
+	}
+	if len(ct) != CiphertextSize {
+		return nil, kem.ErrCiphertextSize
+	}
+	ss := [SharedKeySize]byte{}
+	ssk.DecapsulateTo(ss[:], ct)
+	return ss[:], nil
+func (*scheme) UnmarshalBinaryPublicKey(buf []byte) (kem.PublicKey, error) {
+	if len(buf) != PublicKeySize {
+		return nil, kem.ErrPubKeySize
+	}
+	pk := [PublicKeySize]byte{}
+	copy(pk[:], buf)
+	return &PublicKey{pk: pk}, nil
+func (*scheme) UnmarshalBinaryPrivateKey(buf []byte) (kem.PrivateKey, error) {
+	if len(buf) != PrivateKeySize {
+		return nil, kem.ErrPrivKeySize
+	}
+	sk := [PrivateKeySize]byte{}
+	copy(sk[:], buf)
+	return &PrivateKey{sk: sk}, nil
diff --git a/kem/ntruprime/sntrup653/ntruprime.go b/kem/ntruprime/sntrup653/ntruprime.go
new file mode 100644
index 000000000..e5d4aae79
--- /dev/null
+++ b/kem/ntruprime/sntrup653/ntruprime.go
@@ -0,0 +1,971 @@
+// Code generated from sntrup.templ.go. DO NOT EDIT.
+// Package sntrup653 implements the IND-CCA2 secure key encapsulation mechanism
+// sntrup653 as submitted to round 3 of the NIST PQC competition and
+// described in
+// https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
+package sntrup653
+import (
+	"bytes"
+	cryptoRand "crypto/rand"
+	"crypto/sha512"
+	"github.com/cloudflare/circl/internal/nist"
+	"github.com/cloudflare/circl/kem"
+	"github.com/cloudflare/circl/kem/ntruprime/internal"
+	sntrupKem "github.com/cloudflare/circl/pke/ntruprime/kem"
+	ntrup "github.com/cloudflare/circl/pke/ntruprime/sntrup653"
+type (
+	small  int8
+	Fq     int16
+	Inputs [p]small
+const (
+	p            = ntrup.P
+	q            = ntrup.Q
+	q12          = ((q - 1) / 2)
+	roundedBytes = ntrup.RoundedBytes
+	rqBytes      = ntrup.RqBytes
+	w            = ntrup.W
+	hashBytes = 32
+	smallBytes = ((p + 3) / 4)
+	inputsBytes      = smallBytes
+	ciphertextsBytes = roundedBytes
+	secretKeysBytes  = (2 * smallBytes)
+	publicKeysBytes  = rqBytes
+	confirmBytes = 32
+const (
+	// Size of seed for NewKeyFromSeed
+	// Note that during keyGen, a random small is generated until a valid one (whose reciprocal succeeds) is found
+	// The size of keySeed depends on the number of times the reciprocal fails
+	// This is why DeriveKeyPairFromGen is used to deterministically derive key pair instead of using seed
+	KeySeedSize = 4*p + p*4 + inputsBytes
+	// Size of seed for EncapsulateTo.
+	EncapsulationSeedSize = 4 * p
+	// Size of the established shared key.
+	SharedKeySize = ntrup.SharedKeySize
+	// Size of the encapsulated shared key.
+	CiphertextSize = ntrup.CiphertextSize
+	// Size of a packed public key.
+	PublicKeySize = ntrup.PublicKeySize
+	// Size of a packed private key.
+	PrivateKeySize = ntrup.PrivateKeySize
+// Arithmetic operations over GF(3)
+// A polynomial of R has all of its coefficients in (-1,0,1)
+// F3 is always represented as -1,0,1
+// so ZZ_fromF3 is a no-op
+// x must not be close to top int16
+func f3Freeze(x int16) small {
+	return small(internal.Int32ModUint14(int32(x)+1, 3)) - 1
+// Arithmetic operations over GF(q)
+/* always represented as -q12...q12 */
+/* so ZZ_fromFq is a no-op */
+/* x must not be close to top int32 */
+func fqFreeze(x int32) Fq {
+	return Fq(internal.Int32ModUint14(x+q12, q) - q12)
+// Calculates reciprocal of Fq
+func fqRecip(a1 Fq) Fq {
+	var i int = 1
+	ai := a1
+	for i < (q - 2) {
+		ai = fqFreeze(int32(a1) * int32(ai))
+		i += 1
+	}
+	return ai
+// Returns 0 if the weight w is equal to r
+// otherwise returns -1
+func weightwMask(r []small) int {
+	var weight int = 0
+	for i := 0; i < p; i++ {
+		weight += int(r[i]) & 1
+	}
+	// returns -1 if non zero
+	// otherwise returns 0 if weight==w
+	return internal.Int16NonzeroMask(int16(weight - w))
+/* R3_fromR(R_fromRq(r)) */
+func r3FromRq(out []small, r []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = small(f3Freeze(int16(r[i])))
+	}
+// h = f*g in the ring R3
+func r3Mult(h []small, f []small, g []small) {
+	fg := make([]small, p+p-1)
+	var result small
+	var i, j int
+	for i = 0; i < p; i++ {
+		result = 0
+		for j = 0; j <= i; j++ {
+			result = f3Freeze(int16(result + f[j]*g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i = p; i < p+p-1; i++ {
+		result = 0
+		for j = i - p + 1; j < p; j++ {
+			result = f3Freeze(int16(result + f[j]*g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i = p + p - 2; i >= p; i-- {
+		fg[i-p] = f3Freeze(int16(fg[i-p] + fg[i]))
+		fg[i-p+1] = f3Freeze(int16(fg[i-p+1] + fg[i]))
+	}
+	for i = 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// Calculates the reciprocal of R3 polynomials
+// Returns 0 if recip succeeded; else -1
+func r3Recip(out []small, in []small) int {
+	// out := make([]small, p)
+	f := make([]small, p+1)
+	g := make([]small, p+1)
+	v := make([]small, p+1)
+	r := make([]small, p+1)
+	var sign int
+	r[0] = 1
+	f[0] = 1
+	f[p-1] = -1
+	f[p] = -1
+	for i := 0; i < p; i++ {
+		g[p-1-i] = in[i]
+	}
+	g[p] = 0
+	delta := 1
+	for loop := 0; loop < 2*p-1; loop++ {
+		for i := p; i > 0; i-- {
+			v[i] = v[i-1]
+		}
+		v[0] = 0
+		sign = int(-g[0] * f[0])
+		var swap int = int(internal.Int16NegativeMask(int16(-delta)) & internal.Int16NonzeroMask(int16(g[0])))
+		delta ^= swap & int(delta^-delta)
+		delta += 1
+		for i := 0; i < p+1; i++ {
+			t := swap & int(f[i]^g[i])
+			f[i] ^= small(t)
+			g[i] ^= small(t)
+			t = swap & int(v[i]^r[i])
+			v[i] ^= small(t)
+			r[i] ^= small(t)
+		}
+		for i := 0; i < p+1; i++ {
+			g[i] = f3Freeze(int16(int(g[i]) + sign*int(f[i])))
+		}
+		for i := 0; i < p+1; i++ {
+			r[i] = f3Freeze(int16(int(r[i]) + sign*int(v[i])))
+		}
+		for i := 0; i < p; i++ {
+			g[i] = g[i+1]
+		}
+		g[p] = 0
+	}
+	sign = int(f[0])
+	for i := 0; i < p; i++ {
+		out[i] = small(sign * int(v[p-1-i]))
+	}
+	return internal.Int16NonzeroMask(int16(delta))
+// Polynomials mod q
+// h = f*g in the ring Rq */
+func rqMultSmall(h []Fq, f []Fq, g []small) {
+	fg := make([]Fq, p+p-1)
+	var result Fq
+	for i := 0; i < p; i++ {
+		result = 0
+		for j := 0; j <= i; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p; i < p+p-1; i++ {
+		result = 0
+		for j := i - p + 1; j < p; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p + p - 2; i >= p; i-- {
+		fg[i-p] = fqFreeze(int32(fg[i-p] + fg[i]))
+		fg[i-p+1] = fqFreeze(int32(fg[i-p+1] + fg[i]))
+	}
+	for i := 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// h = 3f in Rq
+func rqMult3(h []Fq, f []Fq) {
+	for i := 0; i < p; i++ {
+		h[i] = fqFreeze(int32(3 * f[i]))
+	}
+// Returns 0 if recip succeeded; else -1
+// out = 1/(3*in) in Rq
+func rqRecip3(out []Fq, in []small) int {
+	f := make([]Fq, p+1)
+	g := make([]Fq, p+1)
+	v := make([]Fq, p+1)
+	r := make([]Fq, p+1)
+	var swap, t int
+	var f0, g0 int32
+	r[0] = fqRecip(3)
+	f[0] = 1
+	f[p-1] = -1
+	f[p] = -1
+	for i := 0; i < p; i++ {
+		g[p-1-i] = Fq(in[i])
+	}
+	g[p] = 0
+	delta := 1
+	for loop := 0; loop < 2*p-1; loop++ {
+		for i := p; i > 0; i-- {
+			v[i] = v[i-1]
+		}
+		v[0] = 0
+		swap = internal.Int16NegativeMask(int16(-delta)) & internal.Int16NonzeroMask(int16(g[0]))
+		delta ^= swap & (delta ^ -delta)
+		delta += 1
+		for i := 0; i < p+1; i++ {
+			t = swap & int(f[i]^g[i])
+			f[i] ^= Fq(t)
+			g[i] ^= Fq(t)
+			t = swap & int(v[i]^r[i])
+			v[i] ^= Fq(t)
+			r[i] ^= Fq(t)
+		}
+		f0 = int32(f[0])
+		g0 = int32(g[0])
+		for i := 0; i < p+1; i++ {
+			g[i] = fqFreeze(f0*int32(g[i]) - g0*int32(f[i]))
+		}
+		for i := 0; i < p+1; i++ {
+			r[i] = fqFreeze(f0*int32(r[i]) - g0*int32(v[i]))
+		}
+		for i := 0; i < p; i++ {
+			g[i] = g[i+1]
+		}
+		g[p] = 0
+	}
+	scale := Fq(fqRecip(f[0]))
+	for i := 0; i < p; i++ {
+		out[i] = fqFreeze(int32(scale) * (int32)(v[p-1-i]))
+	}
+	return internal.Int16NonzeroMask(int16(delta))
+// Rounding all coefficients of a polynomial to the nearest multiple of 3
+// Rounded polynomials mod q
+func round(out []Fq, a []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = a[i] - Fq(f3Freeze(int16(a[i])))
+	}
+// Returns (min(x, y), max(x, y)), executes in constant time
+func minmax(x, y *uint32) {
+	var xi uint32 = *x
+	var yi uint32 = *y
+	var xy uint32 = xi ^ yi
+	var c uint32 = yi - xi
+	c ^= xy & (c ^ yi ^ 0x80000000)
+	c >>= 31
+	c = -c
+	c &= xy
+	*x = xi ^ c
+	*y = yi ^ c
+// Sorts the array of unsigned integers
+func cryptoSortUint32(x []uint32, n int) {
+	if n < 2 {
+		return
+	}
+	top := 1
+	for top < n-top {
+		top += top
+	}
+	for p := top; p > 0; p >>= 1 {
+		for i := 0; i < n-p; i++ {
+			if i&p == 0 {
+				minmax(&x[i], &x[i+p])
+			}
+		}
+		for q := top; q > p; q >>= 1 {
+			for i := 0; i < n-q; i++ {
+				if i&p == 0 {
+					minmax(&x[i+p], &x[i+q])
+				}
+			}
+		}
+	}
+// Sorting to generate short polynomial
+func shortFromList(out []small, in []int32) {
+	L := make([]uint32, p)
+	var neg2, neg3 int = -2, -3
+	for i := 0; i < w; i++ {
+		L[i] = uint32(in[i]) & uint32((neg2))
+	}
+	for i := w; i < p; i++ {
+		L[i] = (uint32(in[i]) & uint32((neg3))) | 1
+	}
+	cryptoSortUint32(L, p)
+	for i := 0; i < p; i++ {
+		out[i] = small((L[i] & 3) - 1)
+	}
+//  Underlying hash function
+// The input byte array, in, is prepended by the byte b
+// and its SHA-512 hash is calculated
+// Only the first 32 bytes of the hash are returned
+// e.g., b = 0 means out = Hash0(in)
+func hashPrefix(out []byte, b int, in []byte, inlen int) {
+	x := make([]byte, inlen+1)
+	h := make([]byte, 64)
+	x[0] = byte(b)
+	copy(x[1:], in)
+	hash := sha512.New()
+	hash.Write([]byte(x))
+	h = hash.Sum(nil)
+	copy(out, h[:32])
+// Higher level randomness
+// Returns a random unsigned integer
+func urandom32(seed []byte) uint32 {
+	var out [4]uint32
+	out[0] = uint32(seed[0])
+	out[1] = uint32(seed[1]) << 8
+	out[2] = uint32(seed[2]) << 16
+	out[3] = uint32(seed[3]) << 24
+	return out[0] + out[1] + out[2] + out[3]
+// Generates a random short polynomial
+func shortRandom(out []small, seed []byte) {
+	L := make([]uint32, p)
+	for i := 0; i < p; i++ {
+		L[i] = urandom32(seed[4*i : 4*i+4])
+	}
+	// Converts uint32 array to int32 array
+	L_int32 := make([]int32, p)
+	for i := 0; i < len(L); i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// Generates a random list of small
+func smallRandom(out []small, seed []byte) {
+	for i := 0; i < p; i++ {
+		out[i] = small(((urandom32(seed[4*i:4*i+4])&0x3fffffff)*3)>>30) - 1
+	}
+// Streamlined NTRU Prime Core
+// h,(f,ginv) = keyGen()
+func keyGen(h []Fq, f []small, ginv []small, gen *nist.DRBG) {
+	g := make([]small, p)
+	seed := make([]byte, 4*p+4*p)
+	if gen == nil {
+		for {
+			cryptoRand.Read(seed[:4*p])
+			smallRandom(g, seed[:4*p])
+			if r3Recip(ginv, g) == 0 {
+				break
+			}
+		}
+		cryptoRand.Read(seed[4*p:])
+	} else {
+		for {
+			for i := 0; i < p; i++ {
+				gen.Fill(seed[4*i : 4*i+4])
+			}
+			smallRandom(g, seed[:4*p])
+			if r3Recip(ginv, g) == 0 {
+				break
+			}
+		}
+		for i := 0; i < p; i++ {
+			gen.Fill(seed[4*p+4*i : 4*p+4*i+4])
+		}
+	}
+	shortRandom(f, seed[4*p:])
+	finv := make([]Fq, p)
+	rqRecip3(finv, f) /* always works */
+	rqMultSmall(h, finv, g)
+// c = encrypt(r,h)
+func encrypt(c []Fq, r []small, h []Fq) {
+	hr := make([]Fq, p)
+	rqMultSmall(hr, h, r)
+	round(c, hr)
+// r = decrypt(c,(f,ginv))
+func decrypt(r []small, c []Fq, f []small, ginv []small) {
+	cf := make([]Fq, p)
+	cf3 := make([]Fq, p)
+	e := make([]small, p)
+	ev := make([]small, p)
+	rqMultSmall(cf, c, f)
+	rqMult3(cf3, cf)
+	r3FromRq(e, cf3)
+	r3Mult(ev, e, ginv)
+	mask := weightwMask(ev) /* 0 if weight w, else -1 */
+	for i := 0; i < w; i++ {
+		r[i] = ((ev[i] ^ 1) & small(^mask)) ^ 1
+	}
+	for i := w; i < p; i++ {
+		r[i] = ev[i] & small(^mask)
+	}
+// Encoding small polynomials (including short polynomials)
+// Transform polynomial in R to bytes
+// these are the only functions that rely on p mod 4 = 1 */
+func smallEncode(s []byte, f []small) {
+	var x small
+	var index int = 0
+	for i := 0; i < p/4; i++ {
+		x = f[index] + 1
+		index++
+		x += (f[index] + 1) << 2
+		index++
+		x += (f[index] + 1) << 4
+		index++
+		x += (f[index] + 1) << 6
+		index++
+		s[0] = byte(x)
+		s = s[1:]
+	}
+	x = f[index] + 1
+	s[0] = byte(x)
+// Transform bytes into polynomial in R
+func smallDecode(f []small, s []byte) {
+	var index int = 0
+	var x byte
+	for i := 0; i < p/4; i++ {
+		x = s[0]
+		s = s[1:]
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		index++
+	}
+	x = s[0]
+	f[index] = ((small)(x & 3)) - 1
+// Encoding general polynomials
+// Transform polynomials in R/q to bytes
+func rqEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16(r[i] + q12)
+		M[i] = q
+	}
+	internal.Encode(s, R, M, p)
+// Transform polynomials in R/q from bytes
+func rqDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = q
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = ((Fq)(R[i])) - q12
+	}
+// Encoding rounded polynomials
+// Transform rounded polynomials to bytes
+func roundedEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16((int32((r[i])+q12) * 10923) >> 15)
+		M[i] = (q + 2) / 3
+	}
+	internal.Encode(s, R, M, p)
+// Transform bytes to rounded polynomials
+func roundedDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = (q + 2) / 3
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = Fq(R[i]*3 - q12)
+	}
+// Streamlined NTRU Prime Core plus encoding
+// Generates public key and private key
+// pk,sk = zKeyGen()
+func zKeyGen(pk []byte, sk []byte, gen *nist.DRBG) {
+	h := make([]Fq, p)
+	f := make([]small, p)
+	v := make([]small, p)
+	keyGen(h, f, v, gen)
+	rqEncode(pk, h)
+	smallEncode(sk, f)
+	sk = sk[smallBytes:]
+	smallEncode(sk, v)
+// C = zEncrypt(r,pk)
+func zEncrypt(C []byte, r Inputs, pk []byte) {
+	h := make([]Fq, p)
+	c := make([]Fq, p)
+	rqDecode(h, pk)
+	encrypt(c, r[:], h)
+	roundedEncode(C, c)
+// r = zDecrypt(C,sk)
+func zDecrypt(r *Inputs, C []byte, sk []byte) {
+	f := make([]small, p)
+	v := make([]small, p)
+	c := make([]Fq, p)
+	smallDecode(f, sk)
+	sk = sk[smallBytes:]
+	smallDecode(v, sk)
+	roundedDecode(c, C)
+	decrypt(r[:], c, f, v)
+// Confirmation hash
+// h = hashConfirm(r,pk,cache); cache is Hash4(pk)
+func hashConfirm(h []byte, r []byte, pk []byte, cache []byte) {
+	x := make([]byte, hashBytes*2)
+	hashPrefix(x, 3, r, inputsBytes)
+	copy(x[hashBytes:], cache[:hashBytes])
+	hashPrefix(h, 2, x, len(x))
+// Session-key hash
+// k = hashSession(b,y,z)
+func hashSession(k []byte, b int, y []byte, z []byte) {
+	x := make([]byte, hashBytes+ciphertextsBytes+confirmBytes)
+	hashPrefix(x, 3, y, inputsBytes)
+	copy(x[hashBytes:], z[:ciphertextsBytes+confirmBytes])
+	hashPrefix(k, b, x, len(x))
+//  Streamlined NTRU Prime
+// pk,sk = kemKeyGen()
+func kemKeyGen(pk []byte, sk []byte, gen *nist.DRBG) {
+	zKeyGen(pk, sk, gen)
+	sk = sk[secretKeysBytes:]
+	copy(sk, pk)
+	sk = sk[publicKeysBytes:]
+	if gen != nil {
+		gen.Fill(sk[:inputsBytes])
+	} else {
+		cryptoRand.Read(sk[:inputsBytes])
+	}
+	sk = sk[inputsBytes:]
+	hashPrefix(sk, 4, pk, publicKeysBytes)
+// c,r_enc = hide(r,pk,cache); cache is Hash4(pk)
+func hide(c []byte, r_enc []byte, r Inputs, pk []byte, cache []byte) {
+	smallEncode(r_enc, r[:])
+	zEncrypt(c, r, pk)
+	c = c[ciphertextsBytes:]
+	hashConfirm(c, r_enc, pk, cache)
+// Takes as input a public key
+// Returns ciphertext and shared key
+// c,k = encap(pk)
+func (pub PublicKey) EncapsulateTo(c []byte, k []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, 4*p)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != 4*p {
+		panic("seed must be of length EncapsulationSeedSize")
+	}
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	pk := pub.pk[:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cache := make([]byte, hashBytes)
+	hashPrefix(cache, 4, pk, publicKeysBytes)
+	shortRandom(r[:], seed)
+	hide(c, r_enc, r, pk, cache)
+	hashSession(k, 1, r_enc, c)
+// Returns 0 if matching ciphertext+confirm, else -1
+func ciphertexts_diff_mask(c []byte, c2 []byte) int {
+	var differentbits uint16 = 0
+	var len int = ciphertextsBytes + confirmBytes
+	for i := 0; i < len; i++ {
+		differentbits |= uint16((c[i]) ^ (c2[i]))
+	}
+	return int((1 & ((differentbits - 1) >> 8)) - 1)
+// Returns shared key from ciphertext and private key
+// k = decap(c,sk)
+func (priv *PrivateKey) DecapsulateTo(k []byte, c []byte) {
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	sk := priv.sk[:]
+	pk := sk[secretKeysBytes:]
+	rho := pk[publicKeysBytes:]
+	cache := rho[inputsBytes:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cnew := make([]byte, ciphertextsBytes+confirmBytes)
+	zDecrypt(&r, c, sk)
+	hide(cnew, r_enc, r, pk, cache)
+	var mask int = ciphertexts_diff_mask(c, cnew)
+	for i := 0; i < inputsBytes; i++ {
+		r_enc[i] ^= byte(mask & int(r_enc[i]^rho[i]))
+	}
+	hashSession(k, 1+mask, r_enc, c)
+// The structure of the private key is given by the following segments:
+// The secret key, the public key, entropy and the hash of the public key
+type PrivateKey struct {
+	sk [PrivateKeySize]byte
+type PublicKey struct {
+	pk [PublicKeySize]byte
+type scheme struct{}
+var sch sntrupKem.Scheme = &scheme{}
+// Scheme returns a KEM interface.
+func Scheme() kem.Scheme { return sch }
+// SntrupScheme returns a sntrup.KEM interface
+func SntrupScheme() sntrupKem.Scheme { return sch }
+func (*scheme) Name() string               { return "sntrup653" }
+func (*scheme) PublicKeySize() int         { return PublicKeySize }
+func (*scheme) PrivateKeySize() int        { return PrivateKeySize }
+func (*scheme) SeedSize() int              { return KeySeedSize }
+func (*scheme) SharedKeySize() int         { return SharedKeySize }
+func (*scheme) CiphertextSize() int        { return CiphertextSize }
+func (*scheme) EncapsulationSeedSize() int { return EncapsulationSeedSize }
+func (sk *PrivateKey) Scheme() kem.Scheme { return sch }
+func (pk *PublicKey) Scheme() kem.Scheme  { return sch }
+func (sk *PrivateKey) MarshalBinary() ([]byte, error) {
+	var ret [PrivateKeySize]byte
+	copy(ret[:], sk.sk[:])
+	return ret[:], nil
+func (sk *PrivateKey) Equal(other kem.PrivateKey) bool {
+	oth, ok := other.(*PrivateKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(sk.sk[:], oth.sk[:])
+func (pk *PublicKey) Equal(other kem.PublicKey) bool {
+	oth, ok := other.(*PublicKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(pk.pk[:], oth.pk[:])
+func (sk *PrivateKey) Public() kem.PublicKey {
+	var pk [PublicKeySize]byte
+	skey, _ := sk.MarshalBinary()
+	ppk := skey[secretKeysBytes : secretKeysBytes+publicKeysBytes]
+	copy(pk[:], ppk[:])
+	return &PublicKey{pk: pk}
+func (pk *PublicKey) MarshalBinary() ([]byte, error) {
+	var ret [PublicKeySize]byte
+	copy(ret[:], pk.pk[:])
+	return ret[:], nil
+func (*scheme) GenerateKeyPair() (kem.PublicKey, kem.PrivateKey, error) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], nil)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}, nil
+// Not used
+func (*scheme) DeriveKeyPair(seed []byte) (kem.PublicKey, kem.PrivateKey) {
+	return nil, nil
+func (*scheme) DeriveKeyPairFromGen(gen *nist.DRBG) (kem.PublicKey, kem.PrivateKey) {
+	if gen == nil {
+		panic("A nist DRBG must be provided")
+	}
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], gen)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}
+func (*scheme) Encapsulate(pk kem.PublicKey) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, nil)
+	return ct, ss, nil
+func (*scheme) EncapsulateDeterministically(pk kem.PublicKey, seed []byte) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, seed)
+	return ct, ss, nil
+func (*scheme) Decapsulate(sk kem.PrivateKey, ct []byte) ([]byte, error) {
+	ssk, ok := sk.(*PrivateKey)
+	if !ok {
+		return nil, kem.ErrTypeMismatch
+	}
+	if len(ct) != CiphertextSize {
+		return nil, kem.ErrCiphertextSize
+	}
+	ss := [SharedKeySize]byte{}
+	ssk.DecapsulateTo(ss[:], ct)
+	return ss[:], nil
+func (*scheme) UnmarshalBinaryPublicKey(buf []byte) (kem.PublicKey, error) {
+	if len(buf) != PublicKeySize {
+		return nil, kem.ErrPubKeySize
+	}
+	pk := [PublicKeySize]byte{}
+	copy(pk[:], buf)
+	return &PublicKey{pk: pk}, nil
+func (*scheme) UnmarshalBinaryPrivateKey(buf []byte) (kem.PrivateKey, error) {
+	if len(buf) != PrivateKeySize {
+		return nil, kem.ErrPrivKeySize
+	}
+	sk := [PrivateKeySize]byte{}
+	copy(sk[:], buf)
+	return &PrivateKey{sk: sk}, nil
diff --git a/kem/ntruprime/sntrup761/ntruprime.go b/kem/ntruprime/sntrup761/ntruprime.go
new file mode 100644
index 000000000..1ea6b232d
--- /dev/null
+++ b/kem/ntruprime/sntrup761/ntruprime.go
@@ -0,0 +1,971 @@
+// Code generated from sntrup.templ.go. DO NOT EDIT.
+// Package sntrup761 implements the IND-CCA2 secure key encapsulation mechanism
+// sntrup761 as submitted to round 3 of the NIST PQC competition and
+// described in
+// https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
+package sntrup761
+import (
+	"bytes"
+	cryptoRand "crypto/rand"
+	"crypto/sha512"
+	"github.com/cloudflare/circl/internal/nist"
+	"github.com/cloudflare/circl/kem"
+	"github.com/cloudflare/circl/kem/ntruprime/internal"
+	sntrupKem "github.com/cloudflare/circl/pke/ntruprime/kem"
+	ntrup "github.com/cloudflare/circl/pke/ntruprime/sntrup761"
+type (
+	small  int8
+	Fq     int16
+	Inputs [p]small
+const (
+	p            = ntrup.P
+	q            = ntrup.Q
+	q12          = ((q - 1) / 2)
+	roundedBytes = ntrup.RoundedBytes
+	rqBytes      = ntrup.RqBytes
+	w            = ntrup.W
+	hashBytes = 32
+	smallBytes = ((p + 3) / 4)
+	inputsBytes      = smallBytes
+	ciphertextsBytes = roundedBytes
+	secretKeysBytes  = (2 * smallBytes)
+	publicKeysBytes  = rqBytes
+	confirmBytes = 32
+const (
+	// Size of seed for NewKeyFromSeed
+	// Note that during keyGen, a random small is generated until a valid one (whose reciprocal succeeds) is found
+	// The size of keySeed depends on the number of times the reciprocal fails
+	// This is why DeriveKeyPairFromGen is used to deterministically derive key pair instead of using seed
+	KeySeedSize = 4*p + p*4 + inputsBytes
+	// Size of seed for EncapsulateTo.
+	EncapsulationSeedSize = 4 * p
+	// Size of the established shared key.
+	SharedKeySize = ntrup.SharedKeySize
+	// Size of the encapsulated shared key.
+	CiphertextSize = ntrup.CiphertextSize
+	// Size of a packed public key.
+	PublicKeySize = ntrup.PublicKeySize
+	// Size of a packed private key.
+	PrivateKeySize = ntrup.PrivateKeySize
+// Arithmetic operations over GF(3)
+// A polynomial of R has all of its coefficients in (-1,0,1)
+// F3 is always represented as -1,0,1
+// so ZZ_fromF3 is a no-op
+// x must not be close to top int16
+func f3Freeze(x int16) small {
+	return small(internal.Int32ModUint14(int32(x)+1, 3)) - 1
+// Arithmetic operations over GF(q)
+/* always represented as -q12...q12 */
+/* so ZZ_fromFq is a no-op */
+/* x must not be close to top int32 */
+func fqFreeze(x int32) Fq {
+	return Fq(internal.Int32ModUint14(x+q12, q) - q12)
+// Calculates reciprocal of Fq
+func fqRecip(a1 Fq) Fq {
+	var i int = 1
+	ai := a1
+	for i < (q - 2) {
+		ai = fqFreeze(int32(a1) * int32(ai))
+		i += 1
+	}
+	return ai
+// Returns 0 if the weight w is equal to r
+// otherwise returns -1
+func weightwMask(r []small) int {
+	var weight int = 0
+	for i := 0; i < p; i++ {
+		weight += int(r[i]) & 1
+	}
+	// returns -1 if non zero
+	// otherwise returns 0 if weight==w
+	return internal.Int16NonzeroMask(int16(weight - w))
+/* R3_fromR(R_fromRq(r)) */
+func r3FromRq(out []small, r []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = small(f3Freeze(int16(r[i])))
+	}
+// h = f*g in the ring R3
+func r3Mult(h []small, f []small, g []small) {
+	fg := make([]small, p+p-1)
+	var result small
+	var i, j int
+	for i = 0; i < p; i++ {
+		result = 0
+		for j = 0; j <= i; j++ {
+			result = f3Freeze(int16(result + f[j]*g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i = p; i < p+p-1; i++ {
+		result = 0
+		for j = i - p + 1; j < p; j++ {
+			result = f3Freeze(int16(result + f[j]*g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i = p + p - 2; i >= p; i-- {
+		fg[i-p] = f3Freeze(int16(fg[i-p] + fg[i]))
+		fg[i-p+1] = f3Freeze(int16(fg[i-p+1] + fg[i]))
+	}
+	for i = 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// Calculates the reciprocal of R3 polynomials
+// Returns 0 if recip succeeded; else -1
+func r3Recip(out []small, in []small) int {
+	// out := make([]small, p)
+	f := make([]small, p+1)
+	g := make([]small, p+1)
+	v := make([]small, p+1)
+	r := make([]small, p+1)
+	var sign int
+	r[0] = 1
+	f[0] = 1
+	f[p-1] = -1
+	f[p] = -1
+	for i := 0; i < p; i++ {
+		g[p-1-i] = in[i]
+	}
+	g[p] = 0
+	delta := 1
+	for loop := 0; loop < 2*p-1; loop++ {
+		for i := p; i > 0; i-- {
+			v[i] = v[i-1]
+		}
+		v[0] = 0
+		sign = int(-g[0] * f[0])
+		var swap int = int(internal.Int16NegativeMask(int16(-delta)) & internal.Int16NonzeroMask(int16(g[0])))
+		delta ^= swap & int(delta^-delta)
+		delta += 1
+		for i := 0; i < p+1; i++ {
+			t := swap & int(f[i]^g[i])
+			f[i] ^= small(t)
+			g[i] ^= small(t)
+			t = swap & int(v[i]^r[i])
+			v[i] ^= small(t)
+			r[i] ^= small(t)
+		}
+		for i := 0; i < p+1; i++ {
+			g[i] = f3Freeze(int16(int(g[i]) + sign*int(f[i])))
+		}
+		for i := 0; i < p+1; i++ {
+			r[i] = f3Freeze(int16(int(r[i]) + sign*int(v[i])))
+		}
+		for i := 0; i < p; i++ {
+			g[i] = g[i+1]
+		}
+		g[p] = 0
+	}
+	sign = int(f[0])
+	for i := 0; i < p; i++ {
+		out[i] = small(sign * int(v[p-1-i]))
+	}
+	return internal.Int16NonzeroMask(int16(delta))
+// Polynomials mod q
+// h = f*g in the ring Rq */
+func rqMultSmall(h []Fq, f []Fq, g []small) {
+	fg := make([]Fq, p+p-1)
+	var result Fq
+	for i := 0; i < p; i++ {
+		result = 0
+		for j := 0; j <= i; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p; i < p+p-1; i++ {
+		result = 0
+		for j := i - p + 1; j < p; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p + p - 2; i >= p; i-- {
+		fg[i-p] = fqFreeze(int32(fg[i-p] + fg[i]))
+		fg[i-p+1] = fqFreeze(int32(fg[i-p+1] + fg[i]))
+	}
+	for i := 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// h = 3f in Rq
+func rqMult3(h []Fq, f []Fq) {
+	for i := 0; i < p; i++ {
+		h[i] = fqFreeze(int32(3 * f[i]))
+	}
+// Returns 0 if recip succeeded; else -1
+// out = 1/(3*in) in Rq
+func rqRecip3(out []Fq, in []small) int {
+	f := make([]Fq, p+1)
+	g := make([]Fq, p+1)
+	v := make([]Fq, p+1)
+	r := make([]Fq, p+1)
+	var swap, t int
+	var f0, g0 int32
+	r[0] = fqRecip(3)
+	f[0] = 1
+	f[p-1] = -1
+	f[p] = -1
+	for i := 0; i < p; i++ {
+		g[p-1-i] = Fq(in[i])
+	}
+	g[p] = 0
+	delta := 1
+	for loop := 0; loop < 2*p-1; loop++ {
+		for i := p; i > 0; i-- {
+			v[i] = v[i-1]
+		}
+		v[0] = 0
+		swap = internal.Int16NegativeMask(int16(-delta)) & internal.Int16NonzeroMask(int16(g[0]))
+		delta ^= swap & (delta ^ -delta)
+		delta += 1
+		for i := 0; i < p+1; i++ {
+			t = swap & int(f[i]^g[i])
+			f[i] ^= Fq(t)
+			g[i] ^= Fq(t)
+			t = swap & int(v[i]^r[i])
+			v[i] ^= Fq(t)
+			r[i] ^= Fq(t)
+		}
+		f0 = int32(f[0])
+		g0 = int32(g[0])
+		for i := 0; i < p+1; i++ {
+			g[i] = fqFreeze(f0*int32(g[i]) - g0*int32(f[i]))
+		}
+		for i := 0; i < p+1; i++ {
+			r[i] = fqFreeze(f0*int32(r[i]) - g0*int32(v[i]))
+		}
+		for i := 0; i < p; i++ {
+			g[i] = g[i+1]
+		}
+		g[p] = 0
+	}
+	scale := Fq(fqRecip(f[0]))
+	for i := 0; i < p; i++ {
+		out[i] = fqFreeze(int32(scale) * (int32)(v[p-1-i]))
+	}
+	return internal.Int16NonzeroMask(int16(delta))
+// Rounding all coefficients of a polynomial to the nearest multiple of 3
+// Rounded polynomials mod q
+func round(out []Fq, a []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = a[i] - Fq(f3Freeze(int16(a[i])))
+	}
+// Returns (min(x, y), max(x, y)), executes in constant time
+func minmax(x, y *uint32) {
+	var xi uint32 = *x
+	var yi uint32 = *y
+	var xy uint32 = xi ^ yi
+	var c uint32 = yi - xi
+	c ^= xy & (c ^ yi ^ 0x80000000)
+	c >>= 31
+	c = -c
+	c &= xy
+	*x = xi ^ c
+	*y = yi ^ c
+// Sorts the array of unsigned integers
+func cryptoSortUint32(x []uint32, n int) {
+	if n < 2 {
+		return
+	}
+	top := 1
+	for top < n-top {
+		top += top
+	}
+	for p := top; p > 0; p >>= 1 {
+		for i := 0; i < n-p; i++ {
+			if i&p == 0 {
+				minmax(&x[i], &x[i+p])
+			}
+		}
+		for q := top; q > p; q >>= 1 {
+			for i := 0; i < n-q; i++ {
+				if i&p == 0 {
+					minmax(&x[i+p], &x[i+q])
+				}
+			}
+		}
+	}
+// Sorting to generate short polynomial
+func shortFromList(out []small, in []int32) {
+	L := make([]uint32, p)
+	var neg2, neg3 int = -2, -3
+	for i := 0; i < w; i++ {
+		L[i] = uint32(in[i]) & uint32((neg2))
+	}
+	for i := w; i < p; i++ {
+		L[i] = (uint32(in[i]) & uint32((neg3))) | 1
+	}
+	cryptoSortUint32(L, p)
+	for i := 0; i < p; i++ {
+		out[i] = small((L[i] & 3) - 1)
+	}
+//  Underlying hash function
+// The input byte array, in, is prepended by the byte b
+// and its SHA-512 hash is calculated
+// Only the first 32 bytes of the hash are returned
+// e.g., b = 0 means out = Hash0(in)
+func hashPrefix(out []byte, b int, in []byte, inlen int) {
+	x := make([]byte, inlen+1)
+	h := make([]byte, 64)
+	x[0] = byte(b)
+	copy(x[1:], in)
+	hash := sha512.New()
+	hash.Write([]byte(x))
+	h = hash.Sum(nil)
+	copy(out, h[:32])
+// Higher level randomness
+// Returns a random unsigned integer
+func urandom32(seed []byte) uint32 {
+	var out [4]uint32
+	out[0] = uint32(seed[0])
+	out[1] = uint32(seed[1]) << 8
+	out[2] = uint32(seed[2]) << 16
+	out[3] = uint32(seed[3]) << 24
+	return out[0] + out[1] + out[2] + out[3]
+// Generates a random short polynomial
+func shortRandom(out []small, seed []byte) {
+	L := make([]uint32, p)
+	for i := 0; i < p; i++ {
+		L[i] = urandom32(seed[4*i : 4*i+4])
+	}
+	// Converts uint32 array to int32 array
+	L_int32 := make([]int32, p)
+	for i := 0; i < len(L); i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// Generates a random list of small
+func smallRandom(out []small, seed []byte) {
+	for i := 0; i < p; i++ {
+		out[i] = small(((urandom32(seed[4*i:4*i+4])&0x3fffffff)*3)>>30) - 1
+	}
+// Streamlined NTRU Prime Core
+// h,(f,ginv) = keyGen()
+func keyGen(h []Fq, f []small, ginv []small, gen *nist.DRBG) {
+	g := make([]small, p)
+	seed := make([]byte, 4*p+4*p)
+	if gen == nil {
+		for {
+			cryptoRand.Read(seed[:4*p])
+			smallRandom(g, seed[:4*p])
+			if r3Recip(ginv, g) == 0 {
+				break
+			}
+		}
+		cryptoRand.Read(seed[4*p:])
+	} else {
+		for {
+			for i := 0; i < p; i++ {
+				gen.Fill(seed[4*i : 4*i+4])
+			}
+			smallRandom(g, seed[:4*p])
+			if r3Recip(ginv, g) == 0 {
+				break
+			}
+		}
+		for i := 0; i < p; i++ {
+			gen.Fill(seed[4*p+4*i : 4*p+4*i+4])
+		}
+	}
+	shortRandom(f, seed[4*p:])
+	finv := make([]Fq, p)
+	rqRecip3(finv, f) /* always works */
+	rqMultSmall(h, finv, g)
+// c = encrypt(r,h)
+func encrypt(c []Fq, r []small, h []Fq) {
+	hr := make([]Fq, p)
+	rqMultSmall(hr, h, r)
+	round(c, hr)
+// r = decrypt(c,(f,ginv))
+func decrypt(r []small, c []Fq, f []small, ginv []small) {
+	cf := make([]Fq, p)
+	cf3 := make([]Fq, p)
+	e := make([]small, p)
+	ev := make([]small, p)
+	rqMultSmall(cf, c, f)
+	rqMult3(cf3, cf)
+	r3FromRq(e, cf3)
+	r3Mult(ev, e, ginv)
+	mask := weightwMask(ev) /* 0 if weight w, else -1 */
+	for i := 0; i < w; i++ {
+		r[i] = ((ev[i] ^ 1) & small(^mask)) ^ 1
+	}
+	for i := w; i < p; i++ {
+		r[i] = ev[i] & small(^mask)
+	}
+// Encoding small polynomials (including short polynomials)
+// Transform polynomial in R to bytes
+// these are the only functions that rely on p mod 4 = 1 */
+func smallEncode(s []byte, f []small) {
+	var x small
+	var index int = 0
+	for i := 0; i < p/4; i++ {
+		x = f[index] + 1
+		index++
+		x += (f[index] + 1) << 2
+		index++
+		x += (f[index] + 1) << 4
+		index++
+		x += (f[index] + 1) << 6
+		index++
+		s[0] = byte(x)
+		s = s[1:]
+	}
+	x = f[index] + 1
+	s[0] = byte(x)
+// Transform bytes into polynomial in R
+func smallDecode(f []small, s []byte) {
+	var index int = 0
+	var x byte
+	for i := 0; i < p/4; i++ {
+		x = s[0]
+		s = s[1:]
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		index++
+	}
+	x = s[0]
+	f[index] = ((small)(x & 3)) - 1
+// Encoding general polynomials
+// Transform polynomials in R/q to bytes
+func rqEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16(r[i] + q12)
+		M[i] = q
+	}
+	internal.Encode(s, R, M, p)
+// Transform polynomials in R/q from bytes
+func rqDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = q
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = ((Fq)(R[i])) - q12
+	}
+// Encoding rounded polynomials
+// Transform rounded polynomials to bytes
+func roundedEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16((int32((r[i])+q12) * 10923) >> 15)
+		M[i] = (q + 2) / 3
+	}
+	internal.Encode(s, R, M, p)
+// Transform bytes to rounded polynomials
+func roundedDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = (q + 2) / 3
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = Fq(R[i]*3 - q12)
+	}
+// Streamlined NTRU Prime Core plus encoding
+// Generates public key and private key
+// pk,sk = zKeyGen()
+func zKeyGen(pk []byte, sk []byte, gen *nist.DRBG) {
+	h := make([]Fq, p)
+	f := make([]small, p)
+	v := make([]small, p)
+	keyGen(h, f, v, gen)
+	rqEncode(pk, h)
+	smallEncode(sk, f)
+	sk = sk[smallBytes:]
+	smallEncode(sk, v)
+// C = zEncrypt(r,pk)
+func zEncrypt(C []byte, r Inputs, pk []byte) {
+	h := make([]Fq, p)
+	c := make([]Fq, p)
+	rqDecode(h, pk)
+	encrypt(c, r[:], h)
+	roundedEncode(C, c)
+// r = zDecrypt(C,sk)
+func zDecrypt(r *Inputs, C []byte, sk []byte) {
+	f := make([]small, p)
+	v := make([]small, p)
+	c := make([]Fq, p)
+	smallDecode(f, sk)
+	sk = sk[smallBytes:]
+	smallDecode(v, sk)
+	roundedDecode(c, C)
+	decrypt(r[:], c, f, v)
+// Confirmation hash
+// h = hashConfirm(r,pk,cache); cache is Hash4(pk)
+func hashConfirm(h []byte, r []byte, pk []byte, cache []byte) {
+	x := make([]byte, hashBytes*2)
+	hashPrefix(x, 3, r, inputsBytes)
+	copy(x[hashBytes:], cache[:hashBytes])
+	hashPrefix(h, 2, x, len(x))
+// Session-key hash
+// k = hashSession(b,y,z)
+func hashSession(k []byte, b int, y []byte, z []byte) {
+	x := make([]byte, hashBytes+ciphertextsBytes+confirmBytes)
+	hashPrefix(x, 3, y, inputsBytes)
+	copy(x[hashBytes:], z[:ciphertextsBytes+confirmBytes])
+	hashPrefix(k, b, x, len(x))
+//  Streamlined NTRU Prime
+// pk,sk = kemKeyGen()
+func kemKeyGen(pk []byte, sk []byte, gen *nist.DRBG) {
+	zKeyGen(pk, sk, gen)
+	sk = sk[secretKeysBytes:]
+	copy(sk, pk)
+	sk = sk[publicKeysBytes:]
+	if gen != nil {
+		gen.Fill(sk[:inputsBytes])
+	} else {
+		cryptoRand.Read(sk[:inputsBytes])
+	}
+	sk = sk[inputsBytes:]
+	hashPrefix(sk, 4, pk, publicKeysBytes)
+// c,r_enc = hide(r,pk,cache); cache is Hash4(pk)
+func hide(c []byte, r_enc []byte, r Inputs, pk []byte, cache []byte) {
+	smallEncode(r_enc, r[:])
+	zEncrypt(c, r, pk)
+	c = c[ciphertextsBytes:]
+	hashConfirm(c, r_enc, pk, cache)
+// Takes as input a public key
+// Returns ciphertext and shared key
+// c,k = encap(pk)
+func (pub PublicKey) EncapsulateTo(c []byte, k []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, 4*p)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != 4*p {
+		panic("seed must be of length EncapsulationSeedSize")
+	}
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	pk := pub.pk[:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cache := make([]byte, hashBytes)
+	hashPrefix(cache, 4, pk, publicKeysBytes)
+	shortRandom(r[:], seed)
+	hide(c, r_enc, r, pk, cache)
+	hashSession(k, 1, r_enc, c)
+// Returns 0 if matching ciphertext+confirm, else -1
+func ciphertexts_diff_mask(c []byte, c2 []byte) int {
+	var differentbits uint16 = 0
+	var len int = ciphertextsBytes + confirmBytes
+	for i := 0; i < len; i++ {
+		differentbits |= uint16((c[i]) ^ (c2[i]))
+	}
+	return int((1 & ((differentbits - 1) >> 8)) - 1)
+// Returns shared key from ciphertext and private key
+// k = decap(c,sk)
+func (priv *PrivateKey) DecapsulateTo(k []byte, c []byte) {
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	sk := priv.sk[:]
+	pk := sk[secretKeysBytes:]
+	rho := pk[publicKeysBytes:]
+	cache := rho[inputsBytes:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cnew := make([]byte, ciphertextsBytes+confirmBytes)
+	zDecrypt(&r, c, sk)
+	hide(cnew, r_enc, r, pk, cache)
+	var mask int = ciphertexts_diff_mask(c, cnew)
+	for i := 0; i < inputsBytes; i++ {
+		r_enc[i] ^= byte(mask & int(r_enc[i]^rho[i]))
+	}
+	hashSession(k, 1+mask, r_enc, c)
+// The structure of the private key is given by the following segments:
+// The secret key, the public key, entropy and the hash of the public key
+type PrivateKey struct {
+	sk [PrivateKeySize]byte
+type PublicKey struct {
+	pk [PublicKeySize]byte
+type scheme struct{}
+var sch sntrupKem.Scheme = &scheme{}
+// Scheme returns a KEM interface.
+func Scheme() kem.Scheme { return sch }
+// SntrupScheme returns a sntrup.KEM interface
+func SntrupScheme() sntrupKem.Scheme { return sch }
+func (*scheme) Name() string               { return "sntrup761" }
+func (*scheme) PublicKeySize() int         { return PublicKeySize }
+func (*scheme) PrivateKeySize() int        { return PrivateKeySize }
+func (*scheme) SeedSize() int              { return KeySeedSize }
+func (*scheme) SharedKeySize() int         { return SharedKeySize }
+func (*scheme) CiphertextSize() int        { return CiphertextSize }
+func (*scheme) EncapsulationSeedSize() int { return EncapsulationSeedSize }
+func (sk *PrivateKey) Scheme() kem.Scheme { return sch }
+func (pk *PublicKey) Scheme() kem.Scheme  { return sch }
+func (sk *PrivateKey) MarshalBinary() ([]byte, error) {
+	var ret [PrivateKeySize]byte
+	copy(ret[:], sk.sk[:])
+	return ret[:], nil
+func (sk *PrivateKey) Equal(other kem.PrivateKey) bool {
+	oth, ok := other.(*PrivateKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(sk.sk[:], oth.sk[:])
+func (pk *PublicKey) Equal(other kem.PublicKey) bool {
+	oth, ok := other.(*PublicKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(pk.pk[:], oth.pk[:])
+func (sk *PrivateKey) Public() kem.PublicKey {
+	var pk [PublicKeySize]byte
+	skey, _ := sk.MarshalBinary()
+	ppk := skey[secretKeysBytes : secretKeysBytes+publicKeysBytes]
+	copy(pk[:], ppk[:])
+	return &PublicKey{pk: pk}
+func (pk *PublicKey) MarshalBinary() ([]byte, error) {
+	var ret [PublicKeySize]byte
+	copy(ret[:], pk.pk[:])
+	return ret[:], nil
+func (*scheme) GenerateKeyPair() (kem.PublicKey, kem.PrivateKey, error) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], nil)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}, nil
+// Not used
+func (*scheme) DeriveKeyPair(seed []byte) (kem.PublicKey, kem.PrivateKey) {
+	return nil, nil
+func (*scheme) DeriveKeyPairFromGen(gen *nist.DRBG) (kem.PublicKey, kem.PrivateKey) {
+	if gen == nil {
+		panic("A nist DRBG must be provided")
+	}
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], gen)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}
+func (*scheme) Encapsulate(pk kem.PublicKey) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, nil)
+	return ct, ss, nil
+func (*scheme) EncapsulateDeterministically(pk kem.PublicKey, seed []byte) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, seed)
+	return ct, ss, nil
+func (*scheme) Decapsulate(sk kem.PrivateKey, ct []byte) ([]byte, error) {
+	ssk, ok := sk.(*PrivateKey)
+	if !ok {
+		return nil, kem.ErrTypeMismatch
+	}
+	if len(ct) != CiphertextSize {
+		return nil, kem.ErrCiphertextSize
+	}
+	ss := [SharedKeySize]byte{}
+	ssk.DecapsulateTo(ss[:], ct)
+	return ss[:], nil
+func (*scheme) UnmarshalBinaryPublicKey(buf []byte) (kem.PublicKey, error) {
+	if len(buf) != PublicKeySize {
+		return nil, kem.ErrPubKeySize
+	}
+	pk := [PublicKeySize]byte{}
+	copy(pk[:], buf)
+	return &PublicKey{pk: pk}, nil
+func (*scheme) UnmarshalBinaryPrivateKey(buf []byte) (kem.PrivateKey, error) {
+	if len(buf) != PrivateKeySize {
+		return nil, kem.ErrPrivKeySize
+	}
+	sk := [PrivateKeySize]byte{}
+	copy(sk[:], buf)
+	return &PrivateKey{sk: sk}, nil
diff --git a/kem/ntruprime/sntrup857/ntruprime.go b/kem/ntruprime/sntrup857/ntruprime.go
new file mode 100644
index 000000000..35b2feb19
--- /dev/null
+++ b/kem/ntruprime/sntrup857/ntruprime.go
@@ -0,0 +1,971 @@
+// Code generated from sntrup.templ.go. DO NOT EDIT.
+// Package sntrup857 implements the IND-CCA2 secure key encapsulation mechanism
+// sntrup857 as submitted to round 3 of the NIST PQC competition and
+// described in
+// https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
+package sntrup857
+import (
+	"bytes"
+	cryptoRand "crypto/rand"
+	"crypto/sha512"
+	"github.com/cloudflare/circl/internal/nist"
+	"github.com/cloudflare/circl/kem"
+	"github.com/cloudflare/circl/kem/ntruprime/internal"
+	sntrupKem "github.com/cloudflare/circl/pke/ntruprime/kem"
+	ntrup "github.com/cloudflare/circl/pke/ntruprime/sntrup857"
+type (
+	small  int8
+	Fq     int16
+	Inputs [p]small
+const (
+	p            = ntrup.P
+	q            = ntrup.Q
+	q12          = ((q - 1) / 2)
+	roundedBytes = ntrup.RoundedBytes
+	rqBytes      = ntrup.RqBytes
+	w            = ntrup.W
+	hashBytes = 32
+	smallBytes = ((p + 3) / 4)
+	inputsBytes      = smallBytes
+	ciphertextsBytes = roundedBytes
+	secretKeysBytes  = (2 * smallBytes)
+	publicKeysBytes  = rqBytes
+	confirmBytes = 32
+const (
+	// Size of seed for NewKeyFromSeed
+	// Note that during keyGen, a random small is generated until a valid one (whose reciprocal succeeds) is found
+	// The size of keySeed depends on the number of times the reciprocal fails
+	// This is why DeriveKeyPairFromGen is used to deterministically derive key pair instead of using seed
+	KeySeedSize = 4*p + p*4 + inputsBytes
+	// Size of seed for EncapsulateTo.
+	EncapsulationSeedSize = 4 * p
+	// Size of the established shared key.
+	SharedKeySize = ntrup.SharedKeySize
+	// Size of the encapsulated shared key.
+	CiphertextSize = ntrup.CiphertextSize
+	// Size of a packed public key.
+	PublicKeySize = ntrup.PublicKeySize
+	// Size of a packed private key.
+	PrivateKeySize = ntrup.PrivateKeySize
+// Arithmetic operations over GF(3)
+// A polynomial of R has all of its coefficients in (-1,0,1)
+// F3 is always represented as -1,0,1
+// so ZZ_fromF3 is a no-op
+// x must not be close to top int16
+func f3Freeze(x int16) small {
+	return small(internal.Int32ModUint14(int32(x)+1, 3)) - 1
+// Arithmetic operations over GF(q)
+/* always represented as -q12...q12 */
+/* so ZZ_fromFq is a no-op */
+/* x must not be close to top int32 */
+func fqFreeze(x int32) Fq {
+	return Fq(internal.Int32ModUint14(x+q12, q) - q12)
+// Calculates reciprocal of Fq
+func fqRecip(a1 Fq) Fq {
+	var i int = 1
+	ai := a1
+	for i < (q - 2) {
+		ai = fqFreeze(int32(a1) * int32(ai))
+		i += 1
+	}
+	return ai
+// Returns 0 if the weight w is equal to r
+// otherwise returns -1
+func weightwMask(r []small) int {
+	var weight int = 0
+	for i := 0; i < p; i++ {
+		weight += int(r[i]) & 1
+	}
+	// returns -1 if non zero
+	// otherwise returns 0 if weight==w
+	return internal.Int16NonzeroMask(int16(weight - w))
+/* R3_fromR(R_fromRq(r)) */
+func r3FromRq(out []small, r []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = small(f3Freeze(int16(r[i])))
+	}
+// h = f*g in the ring R3
+func r3Mult(h []small, f []small, g []small) {
+	fg := make([]small, p+p-1)
+	var result small
+	var i, j int
+	for i = 0; i < p; i++ {
+		result = 0
+		for j = 0; j <= i; j++ {
+			result = f3Freeze(int16(result + f[j]*g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i = p; i < p+p-1; i++ {
+		result = 0
+		for j = i - p + 1; j < p; j++ {
+			result = f3Freeze(int16(result + f[j]*g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i = p + p - 2; i >= p; i-- {
+		fg[i-p] = f3Freeze(int16(fg[i-p] + fg[i]))
+		fg[i-p+1] = f3Freeze(int16(fg[i-p+1] + fg[i]))
+	}
+	for i = 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// Calculates the reciprocal of R3 polynomials
+// Returns 0 if recip succeeded; else -1
+func r3Recip(out []small, in []small) int {
+	// out := make([]small, p)
+	f := make([]small, p+1)
+	g := make([]small, p+1)
+	v := make([]small, p+1)
+	r := make([]small, p+1)
+	var sign int
+	r[0] = 1
+	f[0] = 1
+	f[p-1] = -1
+	f[p] = -1
+	for i := 0; i < p; i++ {
+		g[p-1-i] = in[i]
+	}
+	g[p] = 0
+	delta := 1
+	for loop := 0; loop < 2*p-1; loop++ {
+		for i := p; i > 0; i-- {
+			v[i] = v[i-1]
+		}
+		v[0] = 0
+		sign = int(-g[0] * f[0])
+		var swap int = int(internal.Int16NegativeMask(int16(-delta)) & internal.Int16NonzeroMask(int16(g[0])))
+		delta ^= swap & int(delta^-delta)
+		delta += 1
+		for i := 0; i < p+1; i++ {
+			t := swap & int(f[i]^g[i])
+			f[i] ^= small(t)
+			g[i] ^= small(t)
+			t = swap & int(v[i]^r[i])
+			v[i] ^= small(t)
+			r[i] ^= small(t)
+		}
+		for i := 0; i < p+1; i++ {
+			g[i] = f3Freeze(int16(int(g[i]) + sign*int(f[i])))
+		}
+		for i := 0; i < p+1; i++ {
+			r[i] = f3Freeze(int16(int(r[i]) + sign*int(v[i])))
+		}
+		for i := 0; i < p; i++ {
+			g[i] = g[i+1]
+		}
+		g[p] = 0
+	}
+	sign = int(f[0])
+	for i := 0; i < p; i++ {
+		out[i] = small(sign * int(v[p-1-i]))
+	}
+	return internal.Int16NonzeroMask(int16(delta))
+// Polynomials mod q
+// h = f*g in the ring Rq */
+func rqMultSmall(h []Fq, f []Fq, g []small) {
+	fg := make([]Fq, p+p-1)
+	var result Fq
+	for i := 0; i < p; i++ {
+		result = 0
+		for j := 0; j <= i; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p; i < p+p-1; i++ {
+		result = 0
+		for j := i - p + 1; j < p; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p + p - 2; i >= p; i-- {
+		fg[i-p] = fqFreeze(int32(fg[i-p] + fg[i]))
+		fg[i-p+1] = fqFreeze(int32(fg[i-p+1] + fg[i]))
+	}
+	for i := 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// h = 3f in Rq
+func rqMult3(h []Fq, f []Fq) {
+	for i := 0; i < p; i++ {
+		h[i] = fqFreeze(int32(3 * f[i]))
+	}
+// Returns 0 if recip succeeded; else -1
+// out = 1/(3*in) in Rq
+func rqRecip3(out []Fq, in []small) int {
+	f := make([]Fq, p+1)
+	g := make([]Fq, p+1)
+	v := make([]Fq, p+1)
+	r := make([]Fq, p+1)
+	var swap, t int
+	var f0, g0 int32
+	r[0] = fqRecip(3)
+	f[0] = 1
+	f[p-1] = -1
+	f[p] = -1
+	for i := 0; i < p; i++ {
+		g[p-1-i] = Fq(in[i])
+	}
+	g[p] = 0
+	delta := 1
+	for loop := 0; loop < 2*p-1; loop++ {
+		for i := p; i > 0; i-- {
+			v[i] = v[i-1]
+		}
+		v[0] = 0
+		swap = internal.Int16NegativeMask(int16(-delta)) & internal.Int16NonzeroMask(int16(g[0]))
+		delta ^= swap & (delta ^ -delta)
+		delta += 1
+		for i := 0; i < p+1; i++ {
+			t = swap & int(f[i]^g[i])
+			f[i] ^= Fq(t)
+			g[i] ^= Fq(t)
+			t = swap & int(v[i]^r[i])
+			v[i] ^= Fq(t)
+			r[i] ^= Fq(t)
+		}
+		f0 = int32(f[0])
+		g0 = int32(g[0])
+		for i := 0; i < p+1; i++ {
+			g[i] = fqFreeze(f0*int32(g[i]) - g0*int32(f[i]))
+		}
+		for i := 0; i < p+1; i++ {
+			r[i] = fqFreeze(f0*int32(r[i]) - g0*int32(v[i]))
+		}
+		for i := 0; i < p; i++ {
+			g[i] = g[i+1]
+		}
+		g[p] = 0
+	}
+	scale := Fq(fqRecip(f[0]))
+	for i := 0; i < p; i++ {
+		out[i] = fqFreeze(int32(scale) * (int32)(v[p-1-i]))
+	}
+	return internal.Int16NonzeroMask(int16(delta))
+// Rounding all coefficients of a polynomial to the nearest multiple of 3
+// Rounded polynomials mod q
+func round(out []Fq, a []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = a[i] - Fq(f3Freeze(int16(a[i])))
+	}
+// Returns (min(x, y), max(x, y)), executes in constant time
+func minmax(x, y *uint32) {
+	var xi uint32 = *x
+	var yi uint32 = *y
+	var xy uint32 = xi ^ yi
+	var c uint32 = yi - xi
+	c ^= xy & (c ^ yi ^ 0x80000000)
+	c >>= 31
+	c = -c
+	c &= xy
+	*x = xi ^ c
+	*y = yi ^ c
+// Sorts the array of unsigned integers
+func cryptoSortUint32(x []uint32, n int) {
+	if n < 2 {
+		return
+	}
+	top := 1
+	for top < n-top {
+		top += top
+	}
+	for p := top; p > 0; p >>= 1 {
+		for i := 0; i < n-p; i++ {
+			if i&p == 0 {
+				minmax(&x[i], &x[i+p])
+			}
+		}
+		for q := top; q > p; q >>= 1 {
+			for i := 0; i < n-q; i++ {
+				if i&p == 0 {
+					minmax(&x[i+p], &x[i+q])
+				}
+			}
+		}
+	}
+// Sorting to generate short polynomial
+func shortFromList(out []small, in []int32) {
+	L := make([]uint32, p)
+	var neg2, neg3 int = -2, -3
+	for i := 0; i < w; i++ {
+		L[i] = uint32(in[i]) & uint32((neg2))
+	}
+	for i := w; i < p; i++ {
+		L[i] = (uint32(in[i]) & uint32((neg3))) | 1
+	}
+	cryptoSortUint32(L, p)
+	for i := 0; i < p; i++ {
+		out[i] = small((L[i] & 3) - 1)
+	}
+//  Underlying hash function
+// The input byte array, in, is prepended by the byte b
+// and its SHA-512 hash is calculated
+// Only the first 32 bytes of the hash are returned
+// e.g., b = 0 means out = Hash0(in)
+func hashPrefix(out []byte, b int, in []byte, inlen int) {
+	x := make([]byte, inlen+1)
+	h := make([]byte, 64)
+	x[0] = byte(b)
+	copy(x[1:], in)
+	hash := sha512.New()
+	hash.Write([]byte(x))
+	h = hash.Sum(nil)
+	copy(out, h[:32])
+// Higher level randomness
+// Returns a random unsigned integer
+func urandom32(seed []byte) uint32 {
+	var out [4]uint32
+	out[0] = uint32(seed[0])
+	out[1] = uint32(seed[1]) << 8
+	out[2] = uint32(seed[2]) << 16
+	out[3] = uint32(seed[3]) << 24
+	return out[0] + out[1] + out[2] + out[3]
+// Generates a random short polynomial
+func shortRandom(out []small, seed []byte) {
+	L := make([]uint32, p)
+	for i := 0; i < p; i++ {
+		L[i] = urandom32(seed[4*i : 4*i+4])
+	}
+	// Converts uint32 array to int32 array
+	L_int32 := make([]int32, p)
+	for i := 0; i < len(L); i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// Generates a random list of small
+func smallRandom(out []small, seed []byte) {
+	for i := 0; i < p; i++ {
+		out[i] = small(((urandom32(seed[4*i:4*i+4])&0x3fffffff)*3)>>30) - 1
+	}
+// Streamlined NTRU Prime Core
+// h,(f,ginv) = keyGen()
+func keyGen(h []Fq, f []small, ginv []small, gen *nist.DRBG) {
+	g := make([]small, p)
+	seed := make([]byte, 4*p+4*p)
+	if gen == nil {
+		for {
+			cryptoRand.Read(seed[:4*p])
+			smallRandom(g, seed[:4*p])
+			if r3Recip(ginv, g) == 0 {
+				break
+			}
+		}
+		cryptoRand.Read(seed[4*p:])
+	} else {
+		for {
+			for i := 0; i < p; i++ {
+				gen.Fill(seed[4*i : 4*i+4])
+			}
+			smallRandom(g, seed[:4*p])
+			if r3Recip(ginv, g) == 0 {
+				break
+			}
+		}
+		for i := 0; i < p; i++ {
+			gen.Fill(seed[4*p+4*i : 4*p+4*i+4])
+		}
+	}
+	shortRandom(f, seed[4*p:])
+	finv := make([]Fq, p)
+	rqRecip3(finv, f) /* always works */
+	rqMultSmall(h, finv, g)
+// c = encrypt(r,h)
+func encrypt(c []Fq, r []small, h []Fq) {
+	hr := make([]Fq, p)
+	rqMultSmall(hr, h, r)
+	round(c, hr)
+// r = decrypt(c,(f,ginv))
+func decrypt(r []small, c []Fq, f []small, ginv []small) {
+	cf := make([]Fq, p)
+	cf3 := make([]Fq, p)
+	e := make([]small, p)
+	ev := make([]small, p)
+	rqMultSmall(cf, c, f)
+	rqMult3(cf3, cf)
+	r3FromRq(e, cf3)
+	r3Mult(ev, e, ginv)
+	mask := weightwMask(ev) /* 0 if weight w, else -1 */
+	for i := 0; i < w; i++ {
+		r[i] = ((ev[i] ^ 1) & small(^mask)) ^ 1
+	}
+	for i := w; i < p; i++ {
+		r[i] = ev[i] & small(^mask)
+	}
+// Encoding small polynomials (including short polynomials)
+// Transform polynomial in R to bytes
+// these are the only functions that rely on p mod 4 = 1 */
+func smallEncode(s []byte, f []small) {
+	var x small
+	var index int = 0
+	for i := 0; i < p/4; i++ {
+		x = f[index] + 1
+		index++
+		x += (f[index] + 1) << 2
+		index++
+		x += (f[index] + 1) << 4
+		index++
+		x += (f[index] + 1) << 6
+		index++
+		s[0] = byte(x)
+		s = s[1:]
+	}
+	x = f[index] + 1
+	s[0] = byte(x)
+// Transform bytes into polynomial in R
+func smallDecode(f []small, s []byte) {
+	var index int = 0
+	var x byte
+	for i := 0; i < p/4; i++ {
+		x = s[0]
+		s = s[1:]
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		index++
+	}
+	x = s[0]
+	f[index] = ((small)(x & 3)) - 1
+// Encoding general polynomials
+// Transform polynomials in R/q to bytes
+func rqEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16(r[i] + q12)
+		M[i] = q
+	}
+	internal.Encode(s, R, M, p)
+// Transform polynomials in R/q from bytes
+func rqDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = q
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = ((Fq)(R[i])) - q12
+	}
+// Encoding rounded polynomials
+// Transform rounded polynomials to bytes
+func roundedEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16((int32((r[i])+q12) * 10923) >> 15)
+		M[i] = (q + 2) / 3
+	}
+	internal.Encode(s, R, M, p)
+// Transform bytes to rounded polynomials
+func roundedDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = (q + 2) / 3
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = Fq(R[i]*3 - q12)
+	}
+// Streamlined NTRU Prime Core plus encoding
+// Generates public key and private key
+// pk,sk = zKeyGen()
+func zKeyGen(pk []byte, sk []byte, gen *nist.DRBG) {
+	h := make([]Fq, p)
+	f := make([]small, p)
+	v := make([]small, p)
+	keyGen(h, f, v, gen)
+	rqEncode(pk, h)
+	smallEncode(sk, f)
+	sk = sk[smallBytes:]
+	smallEncode(sk, v)
+// C = zEncrypt(r,pk)
+func zEncrypt(C []byte, r Inputs, pk []byte) {
+	h := make([]Fq, p)
+	c := make([]Fq, p)
+	rqDecode(h, pk)
+	encrypt(c, r[:], h)
+	roundedEncode(C, c)
+// r = zDecrypt(C,sk)
+func zDecrypt(r *Inputs, C []byte, sk []byte) {
+	f := make([]small, p)
+	v := make([]small, p)
+	c := make([]Fq, p)
+	smallDecode(f, sk)
+	sk = sk[smallBytes:]
+	smallDecode(v, sk)
+	roundedDecode(c, C)
+	decrypt(r[:], c, f, v)
+// Confirmation hash
+// h = hashConfirm(r,pk,cache); cache is Hash4(pk)
+func hashConfirm(h []byte, r []byte, pk []byte, cache []byte) {
+	x := make([]byte, hashBytes*2)
+	hashPrefix(x, 3, r, inputsBytes)
+	copy(x[hashBytes:], cache[:hashBytes])
+	hashPrefix(h, 2, x, len(x))
+// Session-key hash
+// k = hashSession(b,y,z)
+func hashSession(k []byte, b int, y []byte, z []byte) {
+	x := make([]byte, hashBytes+ciphertextsBytes+confirmBytes)
+	hashPrefix(x, 3, y, inputsBytes)
+	copy(x[hashBytes:], z[:ciphertextsBytes+confirmBytes])
+	hashPrefix(k, b, x, len(x))
+//  Streamlined NTRU Prime
+// pk,sk = kemKeyGen()
+func kemKeyGen(pk []byte, sk []byte, gen *nist.DRBG) {
+	zKeyGen(pk, sk, gen)
+	sk = sk[secretKeysBytes:]
+	copy(sk, pk)
+	sk = sk[publicKeysBytes:]
+	if gen != nil {
+		gen.Fill(sk[:inputsBytes])
+	} else {
+		cryptoRand.Read(sk[:inputsBytes])
+	}
+	sk = sk[inputsBytes:]
+	hashPrefix(sk, 4, pk, publicKeysBytes)
+// c,r_enc = hide(r,pk,cache); cache is Hash4(pk)
+func hide(c []byte, r_enc []byte, r Inputs, pk []byte, cache []byte) {
+	smallEncode(r_enc, r[:])
+	zEncrypt(c, r, pk)
+	c = c[ciphertextsBytes:]
+	hashConfirm(c, r_enc, pk, cache)
+// Takes as input a public key
+// Returns ciphertext and shared key
+// c,k = encap(pk)
+func (pub PublicKey) EncapsulateTo(c []byte, k []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, 4*p)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != 4*p {
+		panic("seed must be of length EncapsulationSeedSize")
+	}
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	pk := pub.pk[:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cache := make([]byte, hashBytes)
+	hashPrefix(cache, 4, pk, publicKeysBytes)
+	shortRandom(r[:], seed)
+	hide(c, r_enc, r, pk, cache)
+	hashSession(k, 1, r_enc, c)
+// Returns 0 if matching ciphertext+confirm, else -1
+func ciphertexts_diff_mask(c []byte, c2 []byte) int {
+	var differentbits uint16 = 0
+	var len int = ciphertextsBytes + confirmBytes
+	for i := 0; i < len; i++ {
+		differentbits |= uint16((c[i]) ^ (c2[i]))
+	}
+	return int((1 & ((differentbits - 1) >> 8)) - 1)
+// Returns shared key from ciphertext and private key
+// k = decap(c,sk)
+func (priv *PrivateKey) DecapsulateTo(k []byte, c []byte) {
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	sk := priv.sk[:]
+	pk := sk[secretKeysBytes:]
+	rho := pk[publicKeysBytes:]
+	cache := rho[inputsBytes:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cnew := make([]byte, ciphertextsBytes+confirmBytes)
+	zDecrypt(&r, c, sk)
+	hide(cnew, r_enc, r, pk, cache)
+	var mask int = ciphertexts_diff_mask(c, cnew)
+	for i := 0; i < inputsBytes; i++ {
+		r_enc[i] ^= byte(mask & int(r_enc[i]^rho[i]))
+	}
+	hashSession(k, 1+mask, r_enc, c)
+// The structure of the private key is given by the following segments:
+// The secret key, the public key, entropy and the hash of the public key
+type PrivateKey struct {
+	sk [PrivateKeySize]byte
+type PublicKey struct {
+	pk [PublicKeySize]byte
+type scheme struct{}
+var sch sntrupKem.Scheme = &scheme{}
+// Scheme returns a KEM interface.
+func Scheme() kem.Scheme { return sch }
+// SntrupScheme returns a sntrup.KEM interface
+func SntrupScheme() sntrupKem.Scheme { return sch }
+func (*scheme) Name() string               { return "sntrup857" }
+func (*scheme) PublicKeySize() int         { return PublicKeySize }
+func (*scheme) PrivateKeySize() int        { return PrivateKeySize }
+func (*scheme) SeedSize() int              { return KeySeedSize }
+func (*scheme) SharedKeySize() int         { return SharedKeySize }
+func (*scheme) CiphertextSize() int        { return CiphertextSize }
+func (*scheme) EncapsulationSeedSize() int { return EncapsulationSeedSize }
+func (sk *PrivateKey) Scheme() kem.Scheme { return sch }
+func (pk *PublicKey) Scheme() kem.Scheme  { return sch }
+func (sk *PrivateKey) MarshalBinary() ([]byte, error) {
+	var ret [PrivateKeySize]byte
+	copy(ret[:], sk.sk[:])
+	return ret[:], nil
+func (sk *PrivateKey) Equal(other kem.PrivateKey) bool {
+	oth, ok := other.(*PrivateKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(sk.sk[:], oth.sk[:])
+func (pk *PublicKey) Equal(other kem.PublicKey) bool {
+	oth, ok := other.(*PublicKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(pk.pk[:], oth.pk[:])
+func (sk *PrivateKey) Public() kem.PublicKey {
+	var pk [PublicKeySize]byte
+	skey, _ := sk.MarshalBinary()
+	ppk := skey[secretKeysBytes : secretKeysBytes+publicKeysBytes]
+	copy(pk[:], ppk[:])
+	return &PublicKey{pk: pk}
+func (pk *PublicKey) MarshalBinary() ([]byte, error) {
+	var ret [PublicKeySize]byte
+	copy(ret[:], pk.pk[:])
+	return ret[:], nil
+func (*scheme) GenerateKeyPair() (kem.PublicKey, kem.PrivateKey, error) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], nil)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}, nil
+// Not used
+func (*scheme) DeriveKeyPair(seed []byte) (kem.PublicKey, kem.PrivateKey) {
+	return nil, nil
+func (*scheme) DeriveKeyPairFromGen(gen *nist.DRBG) (kem.PublicKey, kem.PrivateKey) {
+	if gen == nil {
+		panic("A nist DRBG must be provided")
+	}
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], gen)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}
+func (*scheme) Encapsulate(pk kem.PublicKey) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, nil)
+	return ct, ss, nil
+func (*scheme) EncapsulateDeterministically(pk kem.PublicKey, seed []byte) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, seed)
+	return ct, ss, nil
+func (*scheme) Decapsulate(sk kem.PrivateKey, ct []byte) ([]byte, error) {
+	ssk, ok := sk.(*PrivateKey)
+	if !ok {
+		return nil, kem.ErrTypeMismatch
+	}
+	if len(ct) != CiphertextSize {
+		return nil, kem.ErrCiphertextSize
+	}
+	ss := [SharedKeySize]byte{}
+	ssk.DecapsulateTo(ss[:], ct)
+	return ss[:], nil
+func (*scheme) UnmarshalBinaryPublicKey(buf []byte) (kem.PublicKey, error) {
+	if len(buf) != PublicKeySize {
+		return nil, kem.ErrPubKeySize
+	}
+	pk := [PublicKeySize]byte{}
+	copy(pk[:], buf)
+	return &PublicKey{pk: pk}, nil
+func (*scheme) UnmarshalBinaryPrivateKey(buf []byte) (kem.PrivateKey, error) {
+	if len(buf) != PrivateKeySize {
+		return nil, kem.ErrPrivKeySize
+	}
+	sk := [PrivateKeySize]byte{}
+	copy(sk[:], buf)
+	return &PrivateKey{sk: sk}, nil
diff --git a/kem/ntruprime/sntrup953/ntruprime.go b/kem/ntruprime/sntrup953/ntruprime.go
new file mode 100644
index 000000000..73bfc5ec7
--- /dev/null
+++ b/kem/ntruprime/sntrup953/ntruprime.go
@@ -0,0 +1,971 @@
+// Code generated from sntrup.templ.go. DO NOT EDIT.
+// Package sntrup953 implements the IND-CCA2 secure key encapsulation mechanism
+// sntrup953 as submitted to round 3 of the NIST PQC competition and
+// described in
+// https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
+package sntrup953
+import (
+	"bytes"
+	cryptoRand "crypto/rand"
+	"crypto/sha512"
+	"github.com/cloudflare/circl/internal/nist"
+	"github.com/cloudflare/circl/kem"
+	"github.com/cloudflare/circl/kem/ntruprime/internal"
+	sntrupKem "github.com/cloudflare/circl/pke/ntruprime/kem"
+	ntrup "github.com/cloudflare/circl/pke/ntruprime/sntrup953"
+type (
+	small  int8
+	Fq     int16
+	Inputs [p]small
+const (
+	p            = ntrup.P
+	q            = ntrup.Q
+	q12          = ((q - 1) / 2)
+	roundedBytes = ntrup.RoundedBytes
+	rqBytes      = ntrup.RqBytes
+	w            = ntrup.W
+	hashBytes = 32
+	smallBytes = ((p + 3) / 4)
+	inputsBytes      = smallBytes
+	ciphertextsBytes = roundedBytes
+	secretKeysBytes  = (2 * smallBytes)
+	publicKeysBytes  = rqBytes
+	confirmBytes = 32
+const (
+	// Size of seed for NewKeyFromSeed
+	// Note that during keyGen, a random small is generated until a valid one (whose reciprocal succeeds) is found
+	// The size of keySeed depends on the number of times the reciprocal fails
+	// This is why DeriveKeyPairFromGen is used to deterministically derive key pair instead of using seed
+	KeySeedSize = 4*p + p*4 + inputsBytes
+	// Size of seed for EncapsulateTo.
+	EncapsulationSeedSize = 4 * p
+	// Size of the established shared key.
+	SharedKeySize = ntrup.SharedKeySize
+	// Size of the encapsulated shared key.
+	CiphertextSize = ntrup.CiphertextSize
+	// Size of a packed public key.
+	PublicKeySize = ntrup.PublicKeySize
+	// Size of a packed private key.
+	PrivateKeySize = ntrup.PrivateKeySize
+// Arithmetic operations over GF(3)
+// A polynomial of R has all of its coefficients in (-1,0,1)
+// F3 is always represented as -1,0,1
+// so ZZ_fromF3 is a no-op
+// x must not be close to top int16
+func f3Freeze(x int16) small {
+	return small(internal.Int32ModUint14(int32(x)+1, 3)) - 1
+// Arithmetic operations over GF(q)
+/* always represented as -q12...q12 */
+/* so ZZ_fromFq is a no-op */
+/* x must not be close to top int32 */
+func fqFreeze(x int32) Fq {
+	return Fq(internal.Int32ModUint14(x+q12, q) - q12)
+// Calculates reciprocal of Fq
+func fqRecip(a1 Fq) Fq {
+	var i int = 1
+	ai := a1
+	for i < (q - 2) {
+		ai = fqFreeze(int32(a1) * int32(ai))
+		i += 1
+	}
+	return ai
+// Returns 0 if the weight w is equal to r
+// otherwise returns -1
+func weightwMask(r []small) int {
+	var weight int = 0
+	for i := 0; i < p; i++ {
+		weight += int(r[i]) & 1
+	}
+	// returns -1 if non zero
+	// otherwise returns 0 if weight==w
+	return internal.Int16NonzeroMask(int16(weight - w))
+/* R3_fromR(R_fromRq(r)) */
+func r3FromRq(out []small, r []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = small(f3Freeze(int16(r[i])))
+	}
+// h = f*g in the ring R3
+func r3Mult(h []small, f []small, g []small) {
+	fg := make([]small, p+p-1)
+	var result small
+	var i, j int
+	for i = 0; i < p; i++ {
+		result = 0
+		for j = 0; j <= i; j++ {
+			result = f3Freeze(int16(result + f[j]*g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i = p; i < p+p-1; i++ {
+		result = 0
+		for j = i - p + 1; j < p; j++ {
+			result = f3Freeze(int16(result + f[j]*g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i = p + p - 2; i >= p; i-- {
+		fg[i-p] = f3Freeze(int16(fg[i-p] + fg[i]))
+		fg[i-p+1] = f3Freeze(int16(fg[i-p+1] + fg[i]))
+	}
+	for i = 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// Calculates the reciprocal of R3 polynomials
+// Returns 0 if recip succeeded; else -1
+func r3Recip(out []small, in []small) int {
+	// out := make([]small, p)
+	f := make([]small, p+1)
+	g := make([]small, p+1)
+	v := make([]small, p+1)
+	r := make([]small, p+1)
+	var sign int
+	r[0] = 1
+	f[0] = 1
+	f[p-1] = -1
+	f[p] = -1
+	for i := 0; i < p; i++ {
+		g[p-1-i] = in[i]
+	}
+	g[p] = 0
+	delta := 1
+	for loop := 0; loop < 2*p-1; loop++ {
+		for i := p; i > 0; i-- {
+			v[i] = v[i-1]
+		}
+		v[0] = 0
+		sign = int(-g[0] * f[0])
+		var swap int = int(internal.Int16NegativeMask(int16(-delta)) & internal.Int16NonzeroMask(int16(g[0])))
+		delta ^= swap & int(delta^-delta)
+		delta += 1
+		for i := 0; i < p+1; i++ {
+			t := swap & int(f[i]^g[i])
+			f[i] ^= small(t)
+			g[i] ^= small(t)
+			t = swap & int(v[i]^r[i])
+			v[i] ^= small(t)
+			r[i] ^= small(t)
+		}
+		for i := 0; i < p+1; i++ {
+			g[i] = f3Freeze(int16(int(g[i]) + sign*int(f[i])))
+		}
+		for i := 0; i < p+1; i++ {
+			r[i] = f3Freeze(int16(int(r[i]) + sign*int(v[i])))
+		}
+		for i := 0; i < p; i++ {
+			g[i] = g[i+1]
+		}
+		g[p] = 0
+	}
+	sign = int(f[0])
+	for i := 0; i < p; i++ {
+		out[i] = small(sign * int(v[p-1-i]))
+	}
+	return internal.Int16NonzeroMask(int16(delta))
+// Polynomials mod q
+// h = f*g in the ring Rq */
+func rqMultSmall(h []Fq, f []Fq, g []small) {
+	fg := make([]Fq, p+p-1)
+	var result Fq
+	for i := 0; i < p; i++ {
+		result = 0
+		for j := 0; j <= i; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p; i < p+p-1; i++ {
+		result = 0
+		for j := i - p + 1; j < p; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p + p - 2; i >= p; i-- {
+		fg[i-p] = fqFreeze(int32(fg[i-p] + fg[i]))
+		fg[i-p+1] = fqFreeze(int32(fg[i-p+1] + fg[i]))
+	}
+	for i := 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// h = 3f in Rq
+func rqMult3(h []Fq, f []Fq) {
+	for i := 0; i < p; i++ {
+		h[i] = fqFreeze(int32(3 * f[i]))
+	}
+// Returns 0 if recip succeeded; else -1
+// out = 1/(3*in) in Rq
+func rqRecip3(out []Fq, in []small) int {
+	f := make([]Fq, p+1)
+	g := make([]Fq, p+1)
+	v := make([]Fq, p+1)
+	r := make([]Fq, p+1)
+	var swap, t int
+	var f0, g0 int32
+	r[0] = fqRecip(3)
+	f[0] = 1
+	f[p-1] = -1
+	f[p] = -1
+	for i := 0; i < p; i++ {
+		g[p-1-i] = Fq(in[i])
+	}
+	g[p] = 0
+	delta := 1
+	for loop := 0; loop < 2*p-1; loop++ {
+		for i := p; i > 0; i-- {
+			v[i] = v[i-1]
+		}
+		v[0] = 0
+		swap = internal.Int16NegativeMask(int16(-delta)) & internal.Int16NonzeroMask(int16(g[0]))
+		delta ^= swap & (delta ^ -delta)
+		delta += 1
+		for i := 0; i < p+1; i++ {
+			t = swap & int(f[i]^g[i])
+			f[i] ^= Fq(t)
+			g[i] ^= Fq(t)
+			t = swap & int(v[i]^r[i])
+			v[i] ^= Fq(t)
+			r[i] ^= Fq(t)
+		}
+		f0 = int32(f[0])
+		g0 = int32(g[0])
+		for i := 0; i < p+1; i++ {
+			g[i] = fqFreeze(f0*int32(g[i]) - g0*int32(f[i]))
+		}
+		for i := 0; i < p+1; i++ {
+			r[i] = fqFreeze(f0*int32(r[i]) - g0*int32(v[i]))
+		}
+		for i := 0; i < p; i++ {
+			g[i] = g[i+1]
+		}
+		g[p] = 0
+	}
+	scale := Fq(fqRecip(f[0]))
+	for i := 0; i < p; i++ {
+		out[i] = fqFreeze(int32(scale) * (int32)(v[p-1-i]))
+	}
+	return internal.Int16NonzeroMask(int16(delta))
+// Rounding all coefficients of a polynomial to the nearest multiple of 3
+// Rounded polynomials mod q
+func round(out []Fq, a []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = a[i] - Fq(f3Freeze(int16(a[i])))
+	}
+// Returns (min(x, y), max(x, y)), executes in constant time
+func minmax(x, y *uint32) {
+	var xi uint32 = *x
+	var yi uint32 = *y
+	var xy uint32 = xi ^ yi
+	var c uint32 = yi - xi
+	c ^= xy & (c ^ yi ^ 0x80000000)
+	c >>= 31
+	c = -c
+	c &= xy
+	*x = xi ^ c
+	*y = yi ^ c
+// Sorts the array of unsigned integers
+func cryptoSortUint32(x []uint32, n int) {
+	if n < 2 {
+		return
+	}
+	top := 1
+	for top < n-top {
+		top += top
+	}
+	for p := top; p > 0; p >>= 1 {
+		for i := 0; i < n-p; i++ {
+			if i&p == 0 {
+				minmax(&x[i], &x[i+p])
+			}
+		}
+		for q := top; q > p; q >>= 1 {
+			for i := 0; i < n-q; i++ {
+				if i&p == 0 {
+					minmax(&x[i+p], &x[i+q])
+				}
+			}
+		}
+	}
+// Sorting to generate short polynomial
+func shortFromList(out []small, in []int32) {
+	L := make([]uint32, p)
+	var neg2, neg3 int = -2, -3
+	for i := 0; i < w; i++ {
+		L[i] = uint32(in[i]) & uint32((neg2))
+	}
+	for i := w; i < p; i++ {
+		L[i] = (uint32(in[i]) & uint32((neg3))) | 1
+	}
+	cryptoSortUint32(L, p)
+	for i := 0; i < p; i++ {
+		out[i] = small((L[i] & 3) - 1)
+	}
+//  Underlying hash function
+// The input byte array, in, is prepended by the byte b
+// and its SHA-512 hash is calculated
+// Only the first 32 bytes of the hash are returned
+// e.g., b = 0 means out = Hash0(in)
+func hashPrefix(out []byte, b int, in []byte, inlen int) {
+	x := make([]byte, inlen+1)
+	h := make([]byte, 64)
+	x[0] = byte(b)
+	copy(x[1:], in)
+	hash := sha512.New()
+	hash.Write([]byte(x))
+	h = hash.Sum(nil)
+	copy(out, h[:32])
+// Higher level randomness
+// Returns a random unsigned integer
+func urandom32(seed []byte) uint32 {
+	var out [4]uint32
+	out[0] = uint32(seed[0])
+	out[1] = uint32(seed[1]) << 8
+	out[2] = uint32(seed[2]) << 16
+	out[3] = uint32(seed[3]) << 24
+	return out[0] + out[1] + out[2] + out[3]
+// Generates a random short polynomial
+func shortRandom(out []small, seed []byte) {
+	L := make([]uint32, p)
+	for i := 0; i < p; i++ {
+		L[i] = urandom32(seed[4*i : 4*i+4])
+	}
+	// Converts uint32 array to int32 array
+	L_int32 := make([]int32, p)
+	for i := 0; i < len(L); i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// Generates a random list of small
+func smallRandom(out []small, seed []byte) {
+	for i := 0; i < p; i++ {
+		out[i] = small(((urandom32(seed[4*i:4*i+4])&0x3fffffff)*3)>>30) - 1
+	}
+// Streamlined NTRU Prime Core
+// h,(f,ginv) = keyGen()
+func keyGen(h []Fq, f []small, ginv []small, gen *nist.DRBG) {
+	g := make([]small, p)
+	seed := make([]byte, 4*p+4*p)
+	if gen == nil {
+		for {
+			cryptoRand.Read(seed[:4*p])
+			smallRandom(g, seed[:4*p])
+			if r3Recip(ginv, g) == 0 {
+				break
+			}
+		}
+		cryptoRand.Read(seed[4*p:])
+	} else {
+		for {
+			for i := 0; i < p; i++ {
+				gen.Fill(seed[4*i : 4*i+4])
+			}
+			smallRandom(g, seed[:4*p])
+			if r3Recip(ginv, g) == 0 {
+				break
+			}
+		}
+		for i := 0; i < p; i++ {
+			gen.Fill(seed[4*p+4*i : 4*p+4*i+4])
+		}
+	}
+	shortRandom(f, seed[4*p:])
+	finv := make([]Fq, p)
+	rqRecip3(finv, f) /* always works */
+	rqMultSmall(h, finv, g)
+// c = encrypt(r,h)
+func encrypt(c []Fq, r []small, h []Fq) {
+	hr := make([]Fq, p)
+	rqMultSmall(hr, h, r)
+	round(c, hr)
+// r = decrypt(c,(f,ginv))
+func decrypt(r []small, c []Fq, f []small, ginv []small) {
+	cf := make([]Fq, p)
+	cf3 := make([]Fq, p)
+	e := make([]small, p)
+	ev := make([]small, p)
+	rqMultSmall(cf, c, f)
+	rqMult3(cf3, cf)
+	r3FromRq(e, cf3)
+	r3Mult(ev, e, ginv)
+	mask := weightwMask(ev) /* 0 if weight w, else -1 */
+	for i := 0; i < w; i++ {
+		r[i] = ((ev[i] ^ 1) & small(^mask)) ^ 1
+	}
+	for i := w; i < p; i++ {
+		r[i] = ev[i] & small(^mask)
+	}
+// Encoding small polynomials (including short polynomials)
+// Transform polynomial in R to bytes
+// these are the only functions that rely on p mod 4 = 1 */
+func smallEncode(s []byte, f []small) {
+	var x small
+	var index int = 0
+	for i := 0; i < p/4; i++ {
+		x = f[index] + 1
+		index++
+		x += (f[index] + 1) << 2
+		index++
+		x += (f[index] + 1) << 4
+		index++
+		x += (f[index] + 1) << 6
+		index++
+		s[0] = byte(x)
+		s = s[1:]
+	}
+	x = f[index] + 1
+	s[0] = byte(x)
+// Transform bytes into polynomial in R
+func smallDecode(f []small, s []byte) {
+	var index int = 0
+	var x byte
+	for i := 0; i < p/4; i++ {
+		x = s[0]
+		s = s[1:]
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		index++
+	}
+	x = s[0]
+	f[index] = ((small)(x & 3)) - 1
+// Encoding general polynomials
+// Transform polynomials in R/q to bytes
+func rqEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16(r[i] + q12)
+		M[i] = q
+	}
+	internal.Encode(s, R, M, p)
+// Transform polynomials in R/q from bytes
+func rqDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = q
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = ((Fq)(R[i])) - q12
+	}
+// Encoding rounded polynomials
+// Transform rounded polynomials to bytes
+func roundedEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16((int32((r[i])+q12) * 10923) >> 15)
+		M[i] = (q + 2) / 3
+	}
+	internal.Encode(s, R, M, p)
+// Transform bytes to rounded polynomials
+func roundedDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = (q + 2) / 3
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = Fq(R[i]*3 - q12)
+	}
+// Streamlined NTRU Prime Core plus encoding
+// Generates public key and private key
+// pk,sk = zKeyGen()
+func zKeyGen(pk []byte, sk []byte, gen *nist.DRBG) {
+	h := make([]Fq, p)
+	f := make([]small, p)
+	v := make([]small, p)
+	keyGen(h, f, v, gen)
+	rqEncode(pk, h)
+	smallEncode(sk, f)
+	sk = sk[smallBytes:]
+	smallEncode(sk, v)
+// C = zEncrypt(r,pk)
+func zEncrypt(C []byte, r Inputs, pk []byte) {
+	h := make([]Fq, p)
+	c := make([]Fq, p)
+	rqDecode(h, pk)
+	encrypt(c, r[:], h)
+	roundedEncode(C, c)
+// r = zDecrypt(C,sk)
+func zDecrypt(r *Inputs, C []byte, sk []byte) {
+	f := make([]small, p)
+	v := make([]small, p)
+	c := make([]Fq, p)
+	smallDecode(f, sk)
+	sk = sk[smallBytes:]
+	smallDecode(v, sk)
+	roundedDecode(c, C)
+	decrypt(r[:], c, f, v)
+// Confirmation hash
+// h = hashConfirm(r,pk,cache); cache is Hash4(pk)
+func hashConfirm(h []byte, r []byte, pk []byte, cache []byte) {
+	x := make([]byte, hashBytes*2)
+	hashPrefix(x, 3, r, inputsBytes)
+	copy(x[hashBytes:], cache[:hashBytes])
+	hashPrefix(h, 2, x, len(x))
+// Session-key hash
+// k = hashSession(b,y,z)
+func hashSession(k []byte, b int, y []byte, z []byte) {
+	x := make([]byte, hashBytes+ciphertextsBytes+confirmBytes)
+	hashPrefix(x, 3, y, inputsBytes)
+	copy(x[hashBytes:], z[:ciphertextsBytes+confirmBytes])
+	hashPrefix(k, b, x, len(x))
+//  Streamlined NTRU Prime
+// pk,sk = kemKeyGen()
+func kemKeyGen(pk []byte, sk []byte, gen *nist.DRBG) {
+	zKeyGen(pk, sk, gen)
+	sk = sk[secretKeysBytes:]
+	copy(sk, pk)
+	sk = sk[publicKeysBytes:]
+	if gen != nil {
+		gen.Fill(sk[:inputsBytes])
+	} else {
+		cryptoRand.Read(sk[:inputsBytes])
+	}
+	sk = sk[inputsBytes:]
+	hashPrefix(sk, 4, pk, publicKeysBytes)
+// c,r_enc = hide(r,pk,cache); cache is Hash4(pk)
+func hide(c []byte, r_enc []byte, r Inputs, pk []byte, cache []byte) {
+	smallEncode(r_enc, r[:])
+	zEncrypt(c, r, pk)
+	c = c[ciphertextsBytes:]
+	hashConfirm(c, r_enc, pk, cache)
+// Takes as input a public key
+// Returns ciphertext and shared key
+// c,k = encap(pk)
+func (pub PublicKey) EncapsulateTo(c []byte, k []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, 4*p)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != 4*p {
+		panic("seed must be of length EncapsulationSeedSize")
+	}
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	pk := pub.pk[:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cache := make([]byte, hashBytes)
+	hashPrefix(cache, 4, pk, publicKeysBytes)
+	shortRandom(r[:], seed)
+	hide(c, r_enc, r, pk, cache)
+	hashSession(k, 1, r_enc, c)
+// Returns 0 if matching ciphertext+confirm, else -1
+func ciphertexts_diff_mask(c []byte, c2 []byte) int {
+	var differentbits uint16 = 0
+	var len int = ciphertextsBytes + confirmBytes
+	for i := 0; i < len; i++ {
+		differentbits |= uint16((c[i]) ^ (c2[i]))
+	}
+	return int((1 & ((differentbits - 1) >> 8)) - 1)
+// Returns shared key from ciphertext and private key
+// k = decap(c,sk)
+func (priv *PrivateKey) DecapsulateTo(k []byte, c []byte) {
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	sk := priv.sk[:]
+	pk := sk[secretKeysBytes:]
+	rho := pk[publicKeysBytes:]
+	cache := rho[inputsBytes:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cnew := make([]byte, ciphertextsBytes+confirmBytes)
+	zDecrypt(&r, c, sk)
+	hide(cnew, r_enc, r, pk, cache)
+	var mask int = ciphertexts_diff_mask(c, cnew)
+	for i := 0; i < inputsBytes; i++ {
+		r_enc[i] ^= byte(mask & int(r_enc[i]^rho[i]))
+	}
+	hashSession(k, 1+mask, r_enc, c)
+// The structure of the private key is given by the following segments:
+// The secret key, the public key, entropy and the hash of the public key
+type PrivateKey struct {
+	sk [PrivateKeySize]byte
+type PublicKey struct {
+	pk [PublicKeySize]byte
+type scheme struct{}
+var sch sntrupKem.Scheme = &scheme{}
+// Scheme returns a KEM interface.
+func Scheme() kem.Scheme { return sch }
+// SntrupScheme returns a sntrup.KEM interface
+func SntrupScheme() sntrupKem.Scheme { return sch }
+func (*scheme) Name() string               { return "sntrup953" }
+func (*scheme) PublicKeySize() int         { return PublicKeySize }
+func (*scheme) PrivateKeySize() int        { return PrivateKeySize }
+func (*scheme) SeedSize() int              { return KeySeedSize }
+func (*scheme) SharedKeySize() int         { return SharedKeySize }
+func (*scheme) CiphertextSize() int        { return CiphertextSize }
+func (*scheme) EncapsulationSeedSize() int { return EncapsulationSeedSize }
+func (sk *PrivateKey) Scheme() kem.Scheme { return sch }
+func (pk *PublicKey) Scheme() kem.Scheme  { return sch }
+func (sk *PrivateKey) MarshalBinary() ([]byte, error) {
+	var ret [PrivateKeySize]byte
+	copy(ret[:], sk.sk[:])
+	return ret[:], nil
+func (sk *PrivateKey) Equal(other kem.PrivateKey) bool {
+	oth, ok := other.(*PrivateKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(sk.sk[:], oth.sk[:])
+func (pk *PublicKey) Equal(other kem.PublicKey) bool {
+	oth, ok := other.(*PublicKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(pk.pk[:], oth.pk[:])
+func (sk *PrivateKey) Public() kem.PublicKey {
+	var pk [PublicKeySize]byte
+	skey, _ := sk.MarshalBinary()
+	ppk := skey[secretKeysBytes : secretKeysBytes+publicKeysBytes]
+	copy(pk[:], ppk[:])
+	return &PublicKey{pk: pk}
+func (pk *PublicKey) MarshalBinary() ([]byte, error) {
+	var ret [PublicKeySize]byte
+	copy(ret[:], pk.pk[:])
+	return ret[:], nil
+func (*scheme) GenerateKeyPair() (kem.PublicKey, kem.PrivateKey, error) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], nil)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}, nil
+// Not used
+func (*scheme) DeriveKeyPair(seed []byte) (kem.PublicKey, kem.PrivateKey) {
+	return nil, nil
+func (*scheme) DeriveKeyPairFromGen(gen *nist.DRBG) (kem.PublicKey, kem.PrivateKey) {
+	if gen == nil {
+		panic("A nist DRBG must be provided")
+	}
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], gen)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}
+func (*scheme) Encapsulate(pk kem.PublicKey) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, nil)
+	return ct, ss, nil
+func (*scheme) EncapsulateDeterministically(pk kem.PublicKey, seed []byte) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, seed)
+	return ct, ss, nil
+func (*scheme) Decapsulate(sk kem.PrivateKey, ct []byte) ([]byte, error) {
+	ssk, ok := sk.(*PrivateKey)
+	if !ok {
+		return nil, kem.ErrTypeMismatch
+	}
+	if len(ct) != CiphertextSize {
+		return nil, kem.ErrCiphertextSize
+	}
+	ss := [SharedKeySize]byte{}
+	ssk.DecapsulateTo(ss[:], ct)
+	return ss[:], nil
+func (*scheme) UnmarshalBinaryPublicKey(buf []byte) (kem.PublicKey, error) {
+	if len(buf) != PublicKeySize {
+		return nil, kem.ErrPubKeySize
+	}
+	pk := [PublicKeySize]byte{}
+	copy(pk[:], buf)
+	return &PublicKey{pk: pk}, nil
+func (*scheme) UnmarshalBinaryPrivateKey(buf []byte) (kem.PrivateKey, error) {
+	if len(buf) != PrivateKeySize {
+		return nil, kem.ErrPrivKeySize
+	}
+	sk := [PrivateKeySize]byte{}
+	copy(sk[:], buf)
+	return &PrivateKey{sk: sk}, nil
diff --git a/kem/ntruprime/templates/ntrulpr.templ.go b/kem/ntruprime/templates/ntrulpr.templ.go
new file mode 100644
index 000000000..0556688dc
--- /dev/null
+++ b/kem/ntruprime/templates/ntrulpr.templ.go
@@ -0,0 +1,837 @@
+// +build ignore
+// The previous line (and this one up to the warning below) is removed by the
+// template generator.
+// Code generated from ntrulpr.templ.go. DO NOT EDIT.
+// Package {{.Pkg}} implements the IND-CCA2 secure key encapsulation mechanism
+// {{.Pkg}} as submitted to round 3 of the NIST PQC competition and
+// described in
+// https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
+package {{.Pkg}}
+import (
+	"bytes"
+	"crypto/aes"
+	"crypto/cipher"
+	cryptoRand "crypto/rand"
+	"crypto/sha512"
+	"github.com/cloudflare/circl/kem"
+	"github.com/cloudflare/circl/kem/ntruprime/internal"
+	ntrup "github.com/cloudflare/circl/pke/ntruprime/{{.Pkg}}"
+const (
+	p             = ntrup.P
+	q             = ntrup.Q
+	q12           = ((q - 1) / 2)
+	roundedBytes = ntrup.RoundedBytes
+	w    = ntrup.W
+	tau0 = ntrup.Tau0
+	tau1 = ntrup.Tau1
+	tau2 = ntrup.Tau2
+	tau3 = ntrup.Tau3
+	I = ntrup.I
+	hashBytes = 32
+	smallBytes = ((p + 3) / 4)
+	inputsBytes      = I / 8
+	seedBytes        = 32
+	ciphertextsBytes = roundedBytes + topBytes
+	secretKeysBytes  = smallBytes
+	publicKeysBytes  = seedBytes + roundedBytes
+	confirmBytes = 32
+	tau = 16
+	topBytes = I / 2
+const (
+	// Size of seed for NewKeyFromSeed
+	KeySeedSize           = seedBytes + p*4 + inputsBytes
+	// Size of seed for EncapsulateTo.
+	EncapsulationSeedSize = inputsBytes
+	// Size of the established shared key.
+	SharedKeySize = ntrup.SharedKeySize
+	// Size of the encapsulated shared key.
+	CiphertextSize = ntrup.CiphertextSize
+	// Size of a packed public key.
+	PublicKeySize = ntrup.PublicKeySize
+	// Size of a packed private key.
+	PrivateKeySize = ntrup.PrivateKeySize
+type (
+	small int8
+	Fq int16
+// arithmetic operations over GF(3)
+// A polynomial of R has all of its coefficients in (-1,0,1)
+// F3 is always represented as -1,0,1
+// so ZZ_fromF3 is a no-op
+// x must not be close to top int16
+func f3Freeze(x int16) small {
+	return small(internal.Int32ModUint14(int32(x)+1, 3)) - 1
+/* ----- arithmetic mod q */
+// GF (q)
+// type Fq int16
+/* always represented as -q12...q12 */
+/* so ZZ_fromFq is a no-op */
+/* x must not be close to top int32 */
+func fqFreeze(x int32) Fq {
+	return Fq(internal.Int32ModUint14(x+q12, q) - q12)
+func top(C Fq) int8 {
+	return int8((tau1*(int32)(C+tau0) + 16384) >> 15)
+func right(T int8) Fq {
+	return fqFreeze(tau3*int32(T) - tau2)
+// Polynomials mod q
+// h = f*g in the ring Rq */
+func rqMultSmall(h []Fq, f []Fq, g []small) {
+	fg := make([]Fq, p+p-1)
+	var result Fq
+	for i := 0; i < p; i++ {
+		result = 0
+		for j := 0; j <= i; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p; i < p+p-1; i++ {
+		result = 0
+		for j := i - p + 1; j < p; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p + p - 2; i >= p; i-- {
+		fg[i-p] = fqFreeze(int32(fg[i-p] + fg[i]))
+		fg[i-p+1] = fqFreeze(int32(fg[i-p+1] + fg[i]))
+	}
+	for i := 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// Rounding all coefficients of a polynomial to the nearest multiple of 3
+// Rounded polynomials mod q
+func round(out []Fq, a []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = a[i] - Fq(f3Freeze(int16(a[i])))
+	}
+// Returns (min(x, y), max(x, y)), executes in constant time
+func minmax(x, y *uint32) {
+	var xi uint32 = *x
+	var yi uint32 = *y
+	var xy uint32 = xi ^ yi
+	var c uint32 = yi - xi
+	c ^= xy & (c ^ yi ^ 0x80000000)
+	c >>= 31
+	c = -c
+	c &= xy
+	*x = xi ^ c
+	*y = yi ^ c
+// Sorts the array of unsigned integers
+func cryptoSortUint32(x []uint32, n int) {
+	if n < 2 {
+		return
+	}
+	top := 1
+	for top < n-top {
+		top += top
+	}
+	for p := top; p > 0; p >>= 1 {
+		for i := 0; i < n-p; i++ {
+			if i&p == 0 {
+				minmax(&x[i], &x[i+p])
+			}
+		}
+		for q := top; q > p; q >>= 1 {
+			for i := 0; i < n-q; i++ {
+				if i&p == 0 {
+					minmax(&x[i+p], &x[i+q])
+				}
+			}
+		}
+	}
+// Sorting to generate short polynomial
+func shortFromList(out[]small,in []int32)  {
+	L := make([]uint32, p)
+	var neg2, neg3 int = -2, -3
+	for i := 0; i < w; i++ {
+		L[i] = uint32(in[i]) & uint32((neg2))
+	}
+	for i := w; i < p; i++ {
+		L[i] = (uint32(in[i]) & uint32((neg3))) | 1
+	}
+	cryptoSortUint32(L, p)
+	for i := 0; i < p; i++ {
+		out[i] = small((L[i] & 3) - 1)
+	}
+// Underlying hash function
+// The input byte array, in, is prepended by the byte b
+// and its SHA-512 hash is calculated
+// Only the first 32 bytes of the hash are returned
+// e.g., b = 0 means out = Hash0(in)
+func hashPrefix(out []byte, b int, in []byte, inlen int) {
+	x := make([]byte, inlen+1)
+	// h := make([]byte, 64)
+	x[0] = byte(b)
+	copy(x[1:], in)
+	hash := sha512.New()
+	hash.Write([]byte(x))
+	h := hash.Sum(nil)
+	copy(out, h[:32])
+// Higher level randomness
+// Returns a random unsigned integer
+// generator can be passed for deterministic number generation
+func urandom32(seed []byte) uint32 {
+	var out [4]uint32
+	out[0] = uint32(seed[0])
+	out[1] = uint32(seed[1]) << 8
+	out[2] = uint32(seed[2]) << 16
+	out[3] = uint32(seed[3]) << 24
+	return out[0] + out[1] + out[2] + out[3]
+// Generates a random short polynomial
+func shortRandom(out[]small,seed []byte)  {
+	L := make([]uint32, p)
+	if seed != nil {
+		for i := 0; i < p; i++ {
+			L[i] = urandom32(seed[i*4 : i*4+4])
+		}
+	} else {
+		for i := 0; i < p; i++ {
+			L[i] = urandom32(nil)
+		}
+	}
+	// Converts uint32 array to int32 array
+	L_int32 := make([]int32, p)
+	for i := 0; i < len(L); i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out,L_int32)
+// NTRU LPRime Core
+// (G,A),a = keyGen(G); leaves G unchanged
+func keyGen(A []Fq, a []small, G []Fq, seed []byte) {
+	aG := make([]Fq, p)
+	shortRandom(a,seed)
+	rqMultSmall(aG, G, a)
+	round(A,aG)
+// B,T = encrypt(r,(G,A),b)
+func encrypt(B []Fq, T []int8,r []int8, G []Fq, A []Fq, b []small) {
+	bG := make([]Fq, p)
+	bA := make([]Fq, p)
+	rqMultSmall(bG, G, b)
+	round(B,bG)
+	rqMultSmall(bA, A, b)
+	for i := 0; i < I; i++ {
+		T[i] = top(fqFreeze(int32(bA[i]) + int32(r[i])*q12))
+	}
+// r = decrypt((B,T),a)
+func decrypt(r []int8, B []Fq, T []int8, a []small) {
+	aB := make([]Fq, p)
+	rqMultSmall(aB, B, a)
+	for i := 0; i < I; i++ {
+		r[i] = int8(-internal.Int16NegativeMask(int16(fqFreeze(int32(right(T[i])) - int32(aB[i]) + 4*w + 1))))
+	}
+// Encoding I-bit inputs
+type Inputs [I]int8
+func inputsEncode(s []byte, r Inputs) {
+	for i := 0; i < I; i++ {
+		s[i>>3] |= byte(r[i] << (i & 7))
+	}
+// Expand
+func expand(L []uint32, k []byte) {
+	temp := make([]byte, len(L))             // plaintext to be encrypted. Should be of the same size as L (4*P)
+	ciphertext := make([]byte, aes.BlockSize+len(temp))
+	block, err := aes.NewCipher(k[:32])
+	if err != nil {
+		panic(err)
+	}
+	stream := cipher.NewCTR(block, ciphertext[:aes.BlockSize])
+	stream.XORKeyStream(ciphertext[aes.BlockSize:], temp)
+	ciphertext=ciphertext[aes.BlockSize:]
+	// convert byte to uint32
+	for i := 0; i < len(temp); i++ {
+		L[i] = uint32(ciphertext[i])
+	}
+	for i := 0; i < p; i++ {
+		var L0 uint32 = L[4*i]
+		var L1 uint32 = L[4*i+1]
+		var L2 uint32 = L[4*i+2]
+		var L3 uint32 = L[4*i+3]
+		L[i] = L0 + (L1 << 8) + (L2 << 16) + (L3 << 24)
+	}
+// generator, hashShort
+// G = generator(k)
+func generator(G []Fq, k []byte) {
+	L := make([]uint32, 4*p)
+	expand(L, k)
+	for i := 0; i < p; i++ {
+		G[i] = Fq(internal.Uint32ModUint14(L[i], q) - q12)
+	}
+// out = hashShort(r)
+func hashShort(out []small,r Inputs) {
+	s := make([]byte, inputsBytes)
+	inputsEncode(s, r)
+	h := make([]byte, hashBytes)
+	L := make([]uint32, 4*p)
+	L_int32 := make([]int32, p)
+	hashPrefix(h, 5, s, len(s))
+	expand(L, h)
+	// convert []uint32 to []int32
+	for i := 0; i < p; i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out,L_int32)
+// NTRU LPRime expand
+// (S,A),a = xKeyGen()
+func xKeyGen(S []byte, A []Fq, a []small, seed []byte) {
+	copy(S, seed[:seedBytes])
+	seed = seed[seedBytes:]
+	G := make([]Fq, p)
+	generator(G,S)
+	keyGen(A, a, G, seed)
+// B,T = xEncrypt(r,(S,A))
+func xEncrypt(B []Fq, T []int8, r []int8, S []byte, A []Fq) {
+	G := make([]Fq, p)
+	generator(G,S)
+	b := make([]small, p)
+	// convert []int8 to Inputs
+	var r_inputs Inputs
+	for i := 0; i < len(r); i++ {
+		r_inputs[i] = r[i]
+	}
+	hashShort(b, r_inputs)
+	encrypt(B, T, r, G, A, b)
+// Encoding small polynomials (including short polynomials)
+// Transform polynomial in R to bytes
+// these are the only functions that rely on p mod 4 = 1 */
+func smallEncode(s []byte, f []small) {
+	var x small
+	var index int = 0
+	for i := 0; i < p/4; i++ {
+		x = f[index] + 1
+		index++
+		x += (f[index] + 1) << 2
+		index++
+		x += (f[index] + 1) << 4
+		index++
+		x += (f[index] + 1) << 6
+		index++
+		s[0] = byte(x)
+		s = s[1:]
+	}
+	x = f[index] + 1
+	s[0] = byte(x)
+// Transform bytes into polynomial in R
+func smallDecode(f []small, s []byte) {
+	var index int = 0
+	var x byte
+	for i := 0; i < p/4; i++ {
+		x = s[0]
+		s = s[1:]
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		index++
+	}
+	x = s[0]
+	f[index] = ((small)(x & 3)) - 1
+// Encoding rounded polynomials
+// Transform rounded polynomials to bytes
+func roundedEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16((int32((r[i])+q12) * 10923) >> 15)
+		M[i] = (q + 2) / 3
+	}
+	internal.Encode(s, R, M, p)
+// Transform bytes to rounded polynomials
+func roundedDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = (q + 2) / 3
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = Fq(R[i]*3 - q12)
+	}
+// Encoding top polynomials
+func topEncode(s []byte,T []int8)  {
+	for i := 0; i < topBytes; i++ {
+		s[i] = byte(T[2*i] + (T[2*i+1] << 4))
+	}
+func topDecode(T []int8, s []byte) {
+	for i := 0; i < topBytes; i++ {
+		T[2*i] = int8(s[i] & 15)
+		T[2*i+1] = int8(s[i] >> 4)
+	}
+// Streamlined NTRU Prime Core plus encoding
+func inputsRandom(r *Inputs, seed []byte) {
+	for i := 0; i < I; i++ {
+		r[i] = int8(1 & (seed[i>>3] >> (i & 7)))
+	}
+// Generates public key and private key
+// pk,sk = zKeyGen()
+func zKeyGen(pk []byte, sk []byte, seed []byte) {
+	A := make([]Fq, p)
+	a := make([]small, p)
+	xKeyGen(pk, A, a, seed)
+	pk = pk[seedBytes:]
+	roundedEncode(pk, A)
+	smallEncode(sk, a)
+// c = zEncrypt(r,pk)
+func zEncrypt(c []byte, r Inputs, pk []byte) {
+	A := make([]Fq, p)
+	B:=make([]Fq,p)
+	T := make([]int8, I)
+	roundedDecode(A, pk[seedBytes:])
+	xEncrypt(B,T,r[:], pk[:seedBytes], A)
+	roundedEncode(c, B)
+	c = c[roundedBytes:]
+	topEncode(c,T)
+// r = zDecrypt(C,sk)
+func zDecrypt(r *Inputs, c []byte, sk []byte) {
+	a := make([]small, p)
+	B := make([]Fq, p)
+	T := make([]int8, I)
+	smallDecode(a, sk)
+	roundedDecode(B, c)
+	topDecode(T, c[roundedBytes:])
+	decrypt(r[:], B, T, a)
+// Confirmation hash
+// h = hashConfirm(r,pk,cache); cache is Hash4(pk)
+func hashConfirm(h []byte, r []byte, pk []byte, cache []byte) {
+	x := make([]byte, inputsBytes+hashBytes)
+	copy(x, r)
+	copy(x[inputsBytes:], cache)
+	hashPrefix(h, 2, x, len(x))
+// Session-key hash
+// k = hashSession(b,y,z)
+func hashSession(k []byte, b int, y []byte, z []byte) {
+	x := make([]byte, inputsBytes+ciphertextsBytes+confirmBytes)
+	copy(x[:inputsBytes], y)
+	copy(x[inputsBytes:], z)
+	hashPrefix(k, b, x, len(x))
+//  Streamlined NTRU Prime
+// pk,sk = kemKeyGen()
+func kemKeyGen(pk []byte, sk []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, KeySeedSize)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != KeySeedSize {
+		panic("seed must be of length KeySeedSize")
+	}
+	zKeyGen(pk, sk, seed[:seedBytes+p*4])
+	seed = seed[seedBytes+p*4:]
+	sk = sk[secretKeysBytes:]
+	copy(sk, pk)
+	sk = sk[publicKeysBytes:]
+	copy(sk[:inputsBytes], seed)
+	sk = sk[inputsBytes:]
+	hashPrefix(sk, 4, pk, publicKeysBytes)
+// c,r_enc = hide(r,pk,cache); cache is Hash4(pk)
+func hide(c []byte, r_enc []byte, r Inputs, pk []byte, cache []byte) {
+	inputsEncode(r_enc, r)
+	zEncrypt(c, r, pk)
+	c = c[ciphertextsBytes:]
+	hashConfirm(c, r_enc, pk, cache)
+// Takes as input a public key
+// Returns ciphertext and shared key
+// c,k = encap(pk)
+func (pk PublicKey) EncapsulateTo(c []byte, k []byte, seed []byte) {
+	if seed == nil {
+		seed=make([]byte,EncapsulationSeedSize)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != EncapsulationSeedSize {
+		panic("seed must be of length EncapsulationSeedSize")
+	}
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	r_enc := make([]byte, inputsBytes)
+	cache := make([]byte, hashBytes)
+	var r Inputs
+	hashPrefix(cache, 4, pk.pk[:], publicKeysBytes)
+	inputsRandom(&r, seed)
+	hide(c, r_enc, r, pk.pk[:], cache)
+	hashSession(k, 1, r_enc, c)
+// Returns 0 if matching ciphertext+confirm, else -1
+func ciphertextsDiffMask(c []byte, c2 []byte) int {
+	var differentbits uint16 = 0
+	var len int = ciphertextsBytes + confirmBytes
+	for i := 0; i < len; i++ {
+		differentbits |= uint16((c[i]) ^ (c2[i]))
+	}
+	return int((1 & ((differentbits - 1) >> 8)) - 1)
+// Returns shared key from ciphertext and private key
+// k = decap(c,sk)
+func (priv *PrivateKey) DecapsulateTo(ss []byte, ct []byte) {
+	if len(ct) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(ss) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	sk := priv.sk[:]
+	pk := sk[secretKeysBytes:]
+	rho := pk[publicKeysBytes:]
+	cache := rho[inputsBytes:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cnew := make([]byte, ciphertextsBytes+confirmBytes)
+	zDecrypt(&r, ct, sk)
+	hide(cnew, r_enc, r, pk, cache)
+	var mask int = ciphertextsDiffMask(ct, cnew)
+	for i := 0; i < inputsBytes; i++ {
+		r_enc[i] ^= byte(mask & int(r_enc[i]^rho[i]))
+	}
+	hashSession(ss, 1+mask, r_enc, ct)
+// The structure of the private key is given by the following segments:
+// The secret key, the public key, entropy and the hash of the public key
+type PrivateKey struct {
+	sk [PrivateKeySize]byte
+type PublicKey struct {
+	pk [PublicKeySize]byte
+type scheme struct{}
+var sch kem.Scheme = &scheme{}
+// Scheme returns a KEM interface.
+func Scheme() kem.Scheme { return sch }
+func (*scheme) Name() string               { return "{{.Pkg}}" }
+func (*scheme) PublicKeySize() int         { return PublicKeySize }
+func (*scheme) PrivateKeySize() int        { return PrivateKeySize }
+func (*scheme) SeedSize() int              { return KeySeedSize }
+func (*scheme) SharedKeySize() int         { return SharedKeySize }
+func (*scheme) CiphertextSize() int        { return CiphertextSize }
+func (*scheme) EncapsulationSeedSize() int { return EncapsulationSeedSize }
+func (sk *PrivateKey) Scheme() kem.Scheme { return sch }
+func (pk *PublicKey) Scheme() kem.Scheme  { return sch }
+func (sk *PrivateKey) MarshalBinary() ([]byte, error) {
+	var ret [PrivateKeySize]byte
+	copy(ret[:], sk.sk[:])
+	return ret[:], nil
+func (sk *PrivateKey) Equal(other kem.PrivateKey) bool {
+	oth, ok := other.(*PrivateKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(sk.sk[:], oth.sk[:])
+func (pk *PublicKey) Equal(other kem.PublicKey) bool {
+	oth, ok := other.(*PublicKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(pk.pk[:], oth.pk[:])
+func (sk *PrivateKey) Public() kem.PublicKey {
+	var pk [PublicKeySize]byte
+	skey, _ := sk.MarshalBinary()
+	ppk := skey[secretKeysBytes : secretKeysBytes+publicKeysBytes]
+	copy(pk[:], ppk[:])
+	return &PublicKey{pk: pk}
+func (pk *PublicKey) MarshalBinary() ([]byte, error) {
+	var ret [PublicKeySize]byte
+	copy(ret[:], pk.pk[:])
+	return ret[:], nil
+func (*scheme) GenerateKeyPair() (kem.PublicKey, kem.PrivateKey, error) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], nil)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}, nil
+func (*scheme) DeriveKeyPair(seed []byte) (kem.PublicKey, kem.PrivateKey) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], seed)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}
+func (*scheme) Encapsulate(pk kem.PublicKey) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, nil)
+	return ct, ss, nil
+func (*scheme) EncapsulateDeterministically(pk kem.PublicKey, seed []byte) (ct, ss []byte, err error) {
+	if len(seed) != EncapsulationSeedSize {
+		return nil, nil, kem.ErrSeedSize
+	}
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, seed)
+	return ct, ss, nil
+func (*scheme) Decapsulate(sk kem.PrivateKey, ct []byte) ([]byte, error) {
+	ssk, ok := sk.(*PrivateKey)
+	if !ok {
+		return nil, kem.ErrTypeMismatch
+	}
+	if len(ct) != CiphertextSize {
+		return nil, kem.ErrCiphertextSize
+	}
+	ss := [SharedKeySize]byte{}
+	ssk.DecapsulateTo(ss[:], ct)
+	return ss[:], nil
+func (*scheme) UnmarshalBinaryPublicKey(buf []byte) (kem.PublicKey, error) {
+	if len(buf) != PublicKeySize {
+		return nil, kem.ErrPubKeySize
+	}
+	pk := [PublicKeySize]byte{}
+	copy(pk[:], buf)
+	return &PublicKey{pk: pk}, nil
+func (*scheme) UnmarshalBinaryPrivateKey(buf []byte) (kem.PrivateKey, error) {
+	if len(buf) != PrivateKeySize {
+		return nil, kem.ErrPrivKeySize
+	}
+	sk := [PrivateKeySize]byte{}
+	copy(sk[:], buf)
+	return &PrivateKey{sk: sk}, nil
diff --git a/kem/ntruprime/templates/sntrup.templ.go b/kem/ntruprime/templates/sntrup.templ.go
new file mode 100644
index 000000000..71cc85a10
--- /dev/null
+++ b/kem/ntruprime/templates/sntrup.templ.go
@@ -0,0 +1,981 @@
+// +build ignore
+// The previous line (and this one up to the warning below) is removed by the
+// template generator.
+// Code generated from sntrup.templ.go. DO NOT EDIT.
+// Package {{.Pkg}} implements the IND-CCA2 secure key encapsulation mechanism
+// {{.Pkg}} as submitted to round 3 of the NIST PQC competition and
+// described in
+// https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
+package {{.Pkg}}
+import (
+	"bytes"
+	cryptoRand "crypto/rand"
+	"crypto/sha512"
+	"github.com/cloudflare/circl/internal/nist"
+	"github.com/cloudflare/circl/kem"
+	"github.com/cloudflare/circl/kem/ntruprime/internal"
+	sntrupKem "github.com/cloudflare/circl/pke/ntruprime/kem"
+	ntrup "github.com/cloudflare/circl/pke/ntruprime/{{.Pkg}}"
+	small int8
+	Fq int16
+	Inputs [p]small
+const (
+	p             = ntrup.P
+	q             = ntrup.Q
+	q12           = ((q - 1) / 2)
+	roundedBytes = ntrup.RoundedBytes
+	rqBytes      = ntrup.RqBytes
+	w             = ntrup.W
+	hashBytes = 32
+	smallBytes = ((p + 3) / 4)
+	inputsBytes      = smallBytes
+	ciphertextsBytes = roundedBytes
+	secretKeysBytes  = (2 * smallBytes)
+	publicKeysBytes  = rqBytes
+	confirmBytes = 32
+const (
+		// Size of seed for NewKeyFromSeed
+		// Note that during keyGen, a random small is generated until a valid one (whose reciprocal succeeds) is found
+		// The size of keySeed depends on the number of times the reciprocal fails
+		// This is why DeriveKeyPairFromGen is used to deterministically derive key pair instead of using seed
+		KeySeedSize = 4*p + p*4 + inputsBytes
+		// Size of seed for EncapsulateTo.
+		EncapsulationSeedSize = 4*p
+		// Size of the established shared key.
+		SharedKeySize = ntrup.SharedKeySize
+		// Size of the encapsulated shared key.
+		CiphertextSize = ntrup.CiphertextSize
+		// Size of a packed public key.
+		PublicKeySize = ntrup.PublicKeySize
+		// Size of a packed private key.
+		PrivateKeySize = ntrup.PrivateKeySize
+// Arithmetic operations over GF(3)
+// A polynomial of R has all of its coefficients in (-1,0,1)
+// F3 is always represented as -1,0,1
+// so ZZ_fromF3 is a no-op
+// x must not be close to top int16
+func f3Freeze(x int16) small {
+	return small(internal.Int32ModUint14(int32(x)+1, 3)) - 1
+// Arithmetic operations over GF(q)
+/* always represented as -q12...q12 */
+/* so ZZ_fromFq is a no-op */
+/* x must not be close to top int32 */
+func fqFreeze(x int32) Fq {
+	return Fq(internal.Int32ModUint14(x+q12, q) - q12)
+// Calculates reciprocal of Fq
+func fqRecip(a1 Fq) Fq {
+	var i int = 1
+	ai := a1
+	for i < (q - 2) {
+		ai = fqFreeze(int32(a1) * int32(ai))
+		i += 1
+	}
+	return ai
+// Returns 0 if the weight w is equal to r
+// otherwise returns -1
+func weightwMask(r []small) int {
+	var weight int = 0
+	for i := 0; i < p; i++ {
+		weight += int(r[i]) & 1
+	}
+	// returns -1 if non zero
+	// otherwise returns 0 if weight==w
+	return internal.Int16NonzeroMask(int16(weight - w))
+/* R3_fromR(R_fromRq(r)) */
+func r3FromRq(out []small, r []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = small(f3Freeze(int16(r[i])))
+	}
+// h = f*g in the ring R3
+func r3Mult(h []small, f []small, g []small) {
+	fg := make([]small, p+p-1)
+	var result small
+	var i, j int
+	for i = 0; i < p; i++ {
+		result = 0
+		for j = 0; j <= i; j++ {
+			result = f3Freeze(int16(result + f[j]*g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i = p; i < p+p-1; i++ {
+		result = 0
+		for j = i - p + 1; j < p; j++ {
+			result = f3Freeze(int16(result + f[j]*g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i = p + p - 2; i >= p; i-- {
+		fg[i-p] = f3Freeze(int16(fg[i-p] + fg[i]))
+		fg[i-p+1] = f3Freeze(int16(fg[i-p+1] + fg[i]))
+	}
+	for i = 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// Calculates the reciprocal of R3 polynomials
+// Returns 0 if recip succeeded; else -1
+func r3Recip(out []small, in []small) int {
+	// out := make([]small, p)
+	f := make([]small, p+1)
+	g := make([]small, p+1)
+	v := make([]small, p+1)
+	r := make([]small, p+1)
+	var sign int
+	r[0] = 1
+	f[0] = 1
+	f[p-1] = -1
+	f[p] = -1
+	for i := 0; i < p; i++ {
+		g[p-1-i] = in[i]
+	}
+	g[p] = 0
+	delta := 1
+	for loop := 0; loop < 2*p-1; loop++ {
+		for i := p; i > 0; i-- {
+			v[i] = v[i-1]
+		}
+		v[0] = 0
+		sign = int(-g[0] * f[0])
+		var swap int = int(internal.Int16NegativeMask(int16(-delta)) & internal.Int16NonzeroMask(int16(g[0])))
+		delta ^= swap & int(delta^-delta)
+		delta += 1
+		for i := 0; i < p+1; i++ {
+			t := swap & int(f[i]^g[i])
+			f[i] ^= small(t)
+			g[i] ^= small(t)
+			t = swap & int(v[i]^r[i])
+			v[i] ^= small(t)
+			r[i] ^= small(t)
+		}
+		for i := 0; i < p+1; i++ {
+			g[i] = f3Freeze(int16(int(g[i]) + sign*int(f[i])))
+		}
+		for i := 0; i < p+1; i++ {
+			r[i] = f3Freeze(int16(int(r[i]) + sign*int(v[i])))
+		}
+		for i := 0; i < p; i++ {
+			g[i] = g[i+1]
+		}
+		g[p] = 0
+	}
+	sign = int(f[0])
+	for i := 0; i < p; i++ {
+		out[i] = small(sign * int(v[p-1-i]))
+	}
+	return internal.Int16NonzeroMask(int16(delta))
+// Polynomials mod q
+// h = f*g in the ring Rq */
+func rqMultSmall(h []Fq, f []Fq, g []small) {
+	fg := make([]Fq, p+p-1)
+	var result Fq
+	for i := 0; i < p; i++ {
+		result = 0
+		for j := 0; j <= i; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p; i < p+p-1; i++ {
+		result = 0
+		for j := i - p + 1; j < p; j++ {
+			result = fqFreeze(int32(result) + int32(f[j])*(int32)(g[i-j]))
+		}
+		fg[i] = result
+	}
+	for i := p + p - 2; i >= p; i-- {
+		fg[i-p] = fqFreeze(int32(fg[i-p] + fg[i]))
+		fg[i-p+1] = fqFreeze(int32(fg[i-p+1] + fg[i]))
+	}
+	for i := 0; i < p; i++ {
+		h[i] = fg[i]
+	}
+// h = 3f in Rq
+func rqMult3(h []Fq, f []Fq) {
+	for i := 0; i < p; i++ {
+		h[i] = fqFreeze(int32(3 * f[i]))
+	}
+// Returns 0 if recip succeeded; else -1
+// out = 1/(3*in) in Rq
+func rqRecip3(out []Fq, in []small) int {
+	f := make([]Fq, p+1)
+	g := make([]Fq, p+1)
+	v := make([]Fq, p+1)
+	r := make([]Fq, p+1)
+	var swap, t int
+	var f0, g0 int32
+	r[0] = fqRecip(3)
+	f[0] = 1
+	f[p-1] = -1
+	f[p] = -1
+	for i := 0; i < p; i++ {
+		g[p-1-i] = Fq(in[i])
+	}
+	g[p] = 0
+	delta := 1
+	for loop := 0; loop < 2*p-1; loop++ {
+		for i := p; i > 0; i-- {
+			v[i] = v[i-1]
+		}
+		v[0] = 0
+		swap = internal.Int16NegativeMask(int16(-delta)) & internal.Int16NonzeroMask(int16(g[0]))
+		delta ^= swap & (delta ^ -delta)
+		delta += 1
+		for i := 0; i < p+1; i++ {
+			t = swap & int(f[i]^g[i])
+			f[i] ^= Fq(t)
+			g[i] ^= Fq(t)
+			t = swap & int(v[i]^r[i])
+			v[i] ^= Fq(t)
+			r[i] ^= Fq(t)
+		}
+		f0 = int32(f[0])
+		g0 = int32(g[0])
+		for i := 0; i < p+1; i++ {
+			g[i] = fqFreeze(f0*int32(g[i]) - g0*int32(f[i]))
+		}
+		for i := 0; i < p+1; i++ {
+			r[i] = fqFreeze(f0*int32(r[i]) - g0*int32(v[i]))
+		}
+		for i := 0; i < p; i++ {
+			g[i] = g[i+1]
+		}
+		g[p] = 0
+	}
+	scale := Fq(fqRecip(f[0]))
+	for i := 0; i < p; i++ {
+		out[i] = fqFreeze(int32(scale) * (int32)(v[p-1-i]))
+	}
+	return internal.Int16NonzeroMask(int16(delta))
+// Rounding all coefficients of a polynomial to the nearest multiple of 3
+// Rounded polynomials mod q
+func round(out []Fq, a []Fq) {
+	for i := 0; i < p; i++ {
+		out[i] = a[i] - Fq(f3Freeze(int16(a[i])))
+	}
+// Returns (min(x, y), max(x, y)), executes in constant time
+func minmax(x , y *uint32) {
+	var xi uint32 = *x
+	var yi uint32 = *y
+	var xy uint32 = xi ^ yi
+	var c uint32 = yi - xi
+	c ^= xy & (c ^ yi ^ 0x80000000)
+	c >>= 31
+	c = -c
+	c &= xy
+	*x = xi ^ c
+	*y = yi ^ c
+// Sorts the array of unsigned integers
+func cryptoSortUint32(x []uint32, n int) {
+	if n < 2 {
+		return
+	}
+	top := 1
+	for top < n-top {
+		top += top
+	}
+	for p := top; p > 0; p >>= 1 {
+		for i := 0; i < n-p; i++ {
+			if i&p == 0 {
+				minmax(&x[i], &x[i+p])
+			}
+		}
+		for q := top; q > p; q >>= 1 {
+			for i := 0; i < n-q; i++ {
+				if i&p == 0 {
+					minmax(&x[i+p], &x[i+q])
+				}
+			}
+		}
+	}
+// Sorting to generate short polynomial
+func shortFromList(out []small, in []int32) {
+	L := make([]uint32, p)
+	var neg2, neg3 int = -2, -3
+	for i := 0; i < w; i++ {
+		L[i] = uint32(in[i]) & uint32((neg2))
+	}
+	for i := w; i < p; i++ {
+		L[i] = (uint32(in[i]) & uint32((neg3))) | 1
+	}
+	cryptoSortUint32(L, p)
+	for i := 0; i < p; i++ {
+		out[i] = small((L[i] & 3) - 1)
+	}
+//  Underlying hash function
+// The input byte array, in, is prepended by the byte b
+// and its SHA-512 hash is calculated
+// Only the first 32 bytes of the hash are returned
+// e.g., b = 0 means out = Hash0(in)
+func hashPrefix(out []byte, b int, in []byte, inlen int) {
+	x := make([]byte, inlen+1)
+	h := make([]byte, 64)
+	x[0] = byte(b)
+	copy(x[1:], in)
+	hash := sha512.New()
+	hash.Write([]byte(x))
+	h = hash.Sum(nil)
+	copy(out, h[:32])
+// Higher level randomness
+// Returns a random unsigned integer
+func urandom32(seed []byte) uint32 {
+	var out [4]uint32
+	out[0] = uint32(seed[0])
+	out[1] = uint32(seed[1]) << 8
+	out[2] = uint32(seed[2]) << 16
+	out[3] = uint32(seed[3]) << 24
+	return out[0] + out[1] + out[2] + out[3]
+// Generates a random short polynomial
+func shortRandom(out []small, seed []byte) {
+	L := make([]uint32, p)
+	for i := 0; i < p; i++ {
+		L[i] = urandom32(seed[4*i : 4*i+4])
+	}
+	// Converts uint32 array to int32 array
+	L_int32 := make([]int32, p)
+	for i := 0; i < len(L); i++ {
+		L_int32[i] = int32(L[i])
+	}
+	shortFromList(out, L_int32)
+// Generates a random list of small
+func smallRandom(out []small, seed []byte){
+	for i := 0; i < p; i++ {
+		out[i] = small(((urandom32(seed[4*i:4*i+4])&0x3fffffff)*3)>>30) - 1
+	}
+// Streamlined NTRU Prime Core
+//  h,(f,ginv) = keyGen()
+func keyGen(h []Fq, f []small, ginv []small, gen *nist.DRBG) {
+	g := make([]small, p)
+	seed := make([]byte, 4*p+4*p)
+	if gen == nil {
+		for {
+			cryptoRand.Read(seed[:4*p])
+			smallRandom(g, seed[:4*p])
+			if r3Recip(ginv, g) == 0 {
+				break
+			}
+		}
+		cryptoRand.Read(seed[4*p:])
+	} else {
+		for {
+			for i := 0; i < p; i++ {
+				gen.Fill(seed[4*i : 4*i+4])
+			}
+			smallRandom(g, seed[:4*p])
+			if r3Recip(ginv, g) == 0 {
+				break
+			}
+		}
+		for i := 0; i < p; i++ {
+			gen.Fill(seed[4*p+4*i : 4*p+4*i+4])
+		}
+	}
+	shortRandom(f, seed[4*p:])
+	finv := make([]Fq, p)
+	rqRecip3(finv, f) /* always works */
+	rqMultSmall(h, finv, g)
+// c = encrypt(r,h)
+func encrypt(c []Fq, r []small, h []Fq) {
+	hr := make([]Fq, p)
+	rqMultSmall(hr, h, r)
+	round(c, hr)
+// r = decrypt(c,(f,ginv))
+func decrypt(r []small, c []Fq, f []small, ginv []small) {
+	cf := make([]Fq, p)
+	cf3 := make([]Fq, p)
+	e := make([]small, p)
+	ev := make([]small, p)
+	rqMultSmall(cf, c, f)
+	rqMult3(cf3, cf)
+	r3FromRq(e, cf3)
+	r3Mult(ev, e, ginv)
+	mask := weightwMask(ev) /* 0 if weight w, else -1 */
+	for i := 0; i < w; i++ {
+		r[i] = ((ev[i] ^ 1) & small(^mask)) ^ 1
+	}
+	for i := w; i < p; i++ {
+		r[i] = ev[i] & small(^mask)
+	}
+// Encoding small polynomials (including short polynomials)
+// Transform polynomial in R to bytes
+// these are the only functions that rely on p mod 4 = 1 */
+func smallEncode(s []byte, f []small) {
+	var x small
+	var index int = 0
+	for i := 0; i < p/4; i++ {
+		x = f[index] + 1
+		index++
+		x += (f[index] + 1) << 2
+		index++
+		x += (f[index] + 1) << 4
+		index++
+		x += (f[index] + 1) << 6
+		index++
+		s[0] = byte(x)
+		s = s[1:]
+	}
+	x = f[index] + 1
+	s[0] = byte(x)
+// Transform bytes into polynomial in R
+func smallDecode(f []small, s []byte) {
+	var index int = 0
+	var x byte
+	for i := 0; i < p/4; i++ {
+		x = s[0]
+		s = s[1:]
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		x >>= 2
+		index++
+		f[index] = ((small)(x & 3)) - 1
+		index++
+	}
+	x = s[0]
+	f[index] = ((small)(x & 3)) - 1
+// Encoding general polynomials
+// Transform polynomials in R/q to bytes
+func rqEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16(r[i] + q12)
+		M[i] = q
+	}
+	internal.Encode(s, R, M, p)
+// Transform polynomials in R/q from bytes
+func rqDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = q
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = ((Fq)(R[i])) - q12
+	}
+// Encoding rounded polynomials
+// Transform rounded polynomials to bytes
+func roundedEncode(s []byte, r []Fq) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		R[i] = uint16((int32((r[i])+q12) * 10923) >> 15)
+		M[i] = (q + 2) / 3
+	}
+	internal.Encode(s, R, M, p)
+// Transform bytes to rounded polynomials
+func roundedDecode(r []Fq, s []byte) {
+	R := make([]uint16, p)
+	M := make([]uint16, p)
+	for i := 0; i < p; i++ {
+		M[i] = (q + 2) / 3
+	}
+	internal.Decode(R, s, M, p)
+	for i := 0; i < p; i++ {
+		r[i] = Fq(R[i]*3 - q12)
+	}
+// Streamlined NTRU Prime Core plus encoding
+// Generates public key and private key
+// pk,sk = zKeyGen()
+func zKeyGen(pk []byte, sk []byte, gen *nist.DRBG) {
+	h := make([]Fq, p)
+	f := make([]small, p)
+	v := make([]small, p)
+	keyGen(h, f, v, gen)
+	rqEncode(pk, h)
+	smallEncode(sk, f)
+	sk = sk[smallBytes:]
+	smallEncode(sk, v)
+// C = zEncrypt(r,pk)
+func zEncrypt(C []byte, r Inputs, pk []byte) {
+	h := make([]Fq, p)
+	c := make([]Fq, p)
+	rqDecode(h, pk)
+	encrypt(c, r[:], h)
+	roundedEncode(C, c)
+// r = zDecrypt(C,sk)
+func zDecrypt(r *Inputs, C []byte, sk []byte) {
+	f := make([]small, p)
+	v := make([]small, p)
+	c := make([]Fq, p)
+	smallDecode(f, sk)
+	sk = sk[smallBytes:]
+	smallDecode(v, sk)
+	roundedDecode(c, C)
+	decrypt(r[:], c, f, v)
+// Confirmation hash
+// h = hashConfirm(r,pk,cache); cache is Hash4(pk)
+func hashConfirm(h []byte, r []byte, pk []byte, cache []byte) {
+	x := make([]byte, hashBytes*2)
+	hashPrefix(x, 3, r, inputsBytes)
+	copy(x[hashBytes:],cache[:hashBytes])
+	hashPrefix(h, 2, x, len(x))
+// Session-key hash
+// k = hashSession(b,y,z)
+func hashSession(k []byte, b int, y []byte, z []byte) {
+	x := make([]byte, hashBytes+ciphertextsBytes+confirmBytes)
+	hashPrefix(x, 3, y, inputsBytes)
+	copy(x[hashBytes:],z[:ciphertextsBytes+confirmBytes])
+	hashPrefix(k, b, x, len(x))
+//  Streamlined NTRU Prime
+// pk,sk = kemKeyGen()
+func kemKeyGen(pk []byte, sk []byte, gen *nist.DRBG) {
+	zKeyGen(pk, sk, gen)
+	sk = sk[secretKeysBytes:]
+	copy(sk, pk)
+	sk = sk[publicKeysBytes:]
+	if gen != nil {
+		gen.Fill(sk[:inputsBytes])
+	} else {
+		cryptoRand.Read(sk[:inputsBytes])
+	}
+	sk = sk[inputsBytes:]
+	hashPrefix(sk, 4, pk, publicKeysBytes)
+//  c,r_enc = hide(r,pk,cache); cache is Hash4(pk)
+func hide(c []byte, r_enc []byte, r Inputs, pk []byte, cache []byte) {
+	smallEncode(r_enc, r[:])
+	zEncrypt(c, r, pk)
+	c = c[ciphertextsBytes:]
+	hashConfirm(c, r_enc, pk, cache)
+// Takes as input a public key
+// Returns ciphertext and shared key
+// c,k = encap(pk)
+func (pub PublicKey) EncapsulateTo(c []byte, k []byte, seed []byte) {
+	if seed == nil {
+		seed = make([]byte, 4*p)
+		cryptoRand.Read(seed)
+	}
+	if len(seed) != 4*p{
+		panic("seed must be of length EncapsulationSeedSize")
+	}
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	pk:=pub.pk[:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cache := make([]byte, hashBytes)
+	hashPrefix(cache, 4, pk, publicKeysBytes)
+	shortRandom(r[:], seed)
+	hide(c, r_enc, r, pk, cache)
+	hashSession(k, 1, r_enc, c)
+// Returns 0 if matching ciphertext+confirm, else -1
+func ciphertexts_diff_mask(c []byte, c2 []byte) int {
+	var differentbits uint16 = 0
+	var len int = ciphertextsBytes + confirmBytes
+	for i := 0; i < len; i++ {
+		differentbits |= uint16((c[i]) ^ (c2[i]))
+	}
+	return int((1 & ((differentbits - 1) >> 8)) - 1)
+// Returns shared key from ciphertext and private key
+// k = decap(c,sk)
+func (priv *PrivateKey) DecapsulateTo (k []byte, c []byte) {
+	if len(c) != CiphertextSize {
+		panic("ct must be of length CiphertextSize")
+	}
+	if len(k) != SharedKeySize {
+		panic("ss must be of length SharedKeySize")
+	}
+	sk:=priv.sk[:]
+	pk := sk[secretKeysBytes:]
+	rho := pk[publicKeysBytes:]
+	cache := rho[inputsBytes:]
+	var r Inputs
+	r_enc := make([]byte, inputsBytes)
+	cnew := make([]byte, ciphertextsBytes+confirmBytes)
+	zDecrypt(&r, c, sk)
+	hide(cnew, r_enc, r, pk, cache)
+	var mask int = ciphertexts_diff_mask(c, cnew)
+	for i := 0; i < inputsBytes; i++ {
+		r_enc[i] ^= byte(mask & int(r_enc[i]^rho[i]))
+	}
+	hashSession(k, 1+mask, r_enc, c)
+// The structure of the private key is given by the following segments:
+// The secret key, the public key, entropy and the hash of the public key
+type PrivateKey struct {
+	sk [PrivateKeySize]byte
+type PublicKey struct {
+	pk [PublicKeySize]byte
+type scheme struct{}
+var sch sntrupKem.Scheme = &scheme{}
+// Scheme returns a KEM interface.
+func Scheme() kem.Scheme { return sch }
+// SntrupScheme returns a sntrup.KEM interface
+func SntrupScheme() sntrupKem.Scheme { return sch }
+func (*scheme) Name() string        		{ return "{{.Pkg}}" }
+func (*scheme) PublicKeySize() int  		{ return PublicKeySize }
+func (*scheme) PrivateKeySize() int			{ return PrivateKeySize }
+func (*scheme) SeedSize() int				{ return KeySeedSize }
+func (*scheme) SharedKeySize() int         	{ return SharedKeySize }
+func (*scheme) CiphertextSize() int        	{ return CiphertextSize }
+func (*scheme) EncapsulationSeedSize() int 	{ return EncapsulationSeedSize }
+func (sk *PrivateKey) Scheme() kem.Scheme { return sch }
+func (pk *PublicKey) Scheme() kem.Scheme  { return sch }
+func (sk *PrivateKey) MarshalBinary() ([]byte, error) {
+	var ret [PrivateKeySize]byte
+	copy(ret[:], sk.sk[:])
+	return ret[:], nil
+func (sk *PrivateKey) Equal(other kem.PrivateKey) bool {
+	oth, ok := other.(*PrivateKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(sk.sk[:], oth.sk[:])
+func (pk *PublicKey) Equal(other kem.PublicKey) bool {
+	oth, ok := other.(*PublicKey)
+	if !ok {
+		return false
+	}
+	return bytes.Equal(pk.pk[:], oth.pk[:])
+func (sk *PrivateKey) Public() kem.PublicKey {
+	var pk [PublicKeySize]byte
+	skey, _ := sk.MarshalBinary()
+	ppk := skey[secretKeysBytes : secretKeysBytes+publicKeysBytes]
+	copy(pk[:], ppk[:])
+	return &PublicKey{pk: pk}
+func (pk *PublicKey) MarshalBinary() ([]byte, error) {
+	var ret [PublicKeySize]byte
+	copy(ret[:], pk.pk[:])
+	return ret[:], nil
+func (*scheme) GenerateKeyPair() (kem.PublicKey, kem.PrivateKey, error) {
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], nil)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}, nil
+// Not used
+func (*scheme) DeriveKeyPair(seed []byte) (kem.PublicKey, kem.PrivateKey) {
+	return nil, nil
+func (*scheme) DeriveKeyPairFromGen(gen *nist.DRBG) (kem.PublicKey, kem.PrivateKey) {
+	if gen == nil {
+		panic("A nist DRBG must be provided")
+	}
+	var pk [PublicKeySize]byte
+	var sk [PrivateKeySize]byte
+	kemKeyGen(pk[:], sk[:], gen)
+	return &PublicKey{pk: pk}, &PrivateKey{sk: sk}
+func (*scheme) Encapsulate(pk kem.PublicKey) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, nil)
+	return ct, ss, nil
+func (*scheme)EncapsulateDeterministically(pk kem.PublicKey, seed []byte) (ct, ss []byte, err error) {
+	ct = make([]byte, CiphertextSize)
+	ss = make([]byte, SharedKeySize)
+	pub, ok := pk.(*PublicKey)
+	if !ok {
+		return nil, nil, kem.ErrTypeMismatch
+	}
+	pub.EncapsulateTo(ct, ss, seed)
+	return ct, ss, nil
+func (*scheme) Decapsulate(sk kem.PrivateKey, ct []byte) ([]byte, error) {
+	ssk, ok := sk.(*PrivateKey)
+	if !ok {
+		return nil, kem.ErrTypeMismatch
+	}
+	if len(ct) != CiphertextSize {
+		return nil, kem.ErrCiphertextSize
+	}
+	ss := [SharedKeySize]byte{}
+	ssk.DecapsulateTo(ss[:], ct)
+	return ss[:], nil
+func (*scheme) UnmarshalBinaryPublicKey(buf []byte) (kem.PublicKey, error) {
+	if len(buf) != PublicKeySize {
+		return nil, kem.ErrPubKeySize
+	}
+	pk := [PublicKeySize]byte{}
+	copy(pk[:], buf)
+	return &PublicKey{pk: pk}, nil
+func (*scheme) UnmarshalBinaryPrivateKey(buf []byte) (kem.PrivateKey, error) {
+	if len(buf) != PrivateKeySize {
+		return nil, kem.ErrPrivKeySize
+	}
+	sk := [PrivateKeySize]byte{}
+	copy(sk[:], buf)
+	return &PrivateKey{sk: sk}, nil
diff --git a/kem/schemes/schemes.go b/kem/schemes/schemes.go
index a33e7b96e..15a3eb14f 100644
--- a/kem/schemes/schemes.go
+++ b/kem/schemes/schemes.go
@@ -14,6 +14,10 @@
 //	FrodoKEM-640-SHAKE
 //	Kyber512, Kyber768, Kyber1024
+//	Kyber512, Kyber768, Kyber1024
 package schemes
 import (
@@ -26,6 +30,18 @@ import (
+	"github.com/cloudflare/circl/kem/ntruprime/ntrulpr1013"
+	"github.com/cloudflare/circl/kem/ntruprime/ntrulpr1277"
+	"github.com/cloudflare/circl/kem/ntruprime/ntrulpr653"
+	"github.com/cloudflare/circl/kem/ntruprime/ntrulpr761"
+	"github.com/cloudflare/circl/kem/ntruprime/ntrulpr857"
+	"github.com/cloudflare/circl/kem/ntruprime/ntrulpr953"
+	"github.com/cloudflare/circl/kem/ntruprime/sntrup1013"
+	"github.com/cloudflare/circl/kem/ntruprime/sntrup1277"
+	"github.com/cloudflare/circl/kem/ntruprime/sntrup653"
+	"github.com/cloudflare/circl/kem/ntruprime/sntrup761"
+	"github.com/cloudflare/circl/kem/ntruprime/sntrup857"
+	"github.com/cloudflare/circl/kem/ntruprime/sntrup953"
 var allSchemes = [...]kem.Scheme{
@@ -43,6 +59,18 @@ var allSchemes = [...]kem.Scheme{
+	ntrulpr653.Scheme(),
+	ntrulpr761.Scheme(),
+	ntrulpr857.Scheme(),
+	ntrulpr953.Scheme(),
+	ntrulpr1013.Scheme(),
+	ntrulpr1277.Scheme(),
+	sntrup653.Scheme(),
+	sntrup761.Scheme(),
+	sntrup857.Scheme(),
+	sntrup953.Scheme(),
+	sntrup1013.Scheme(),
+	sntrup1277.Scheme(),
 var allSchemeNames map[string]kem.Scheme
diff --git a/kem/schemes/schemes_test.go b/kem/schemes/schemes_test.go
index e41840b34..528e85f0f 100644
--- a/kem/schemes/schemes_test.go
+++ b/kem/schemes/schemes_test.go
@@ -160,4 +160,16 @@ func Example_schemes() {
 	// Kyber768-X448
 	// Kyber1024-X448
 	// P256Kyber768Draft00
+	// ntrulpr653
+	// ntrulpr761
+	// ntrulpr857
+	// ntrulpr953
+	// ntrulpr1013
+	// ntrulpr1277
+	// sntrup653
+	// sntrup761
+	// sntrup857
+	// sntrup953
+	// sntrup1013
+	// sntrup1277
diff --git a/pke/ntruprime/gen.go b/pke/ntruprime/gen.go
new file mode 100644
index 000000000..a9d456dfc
--- /dev/null
+++ b/pke/ntruprime/gen.go
@@ -0,0 +1,115 @@
+package main
+import (
+	"bytes"
+	"go/format"
+	"io/ioutil"
+	"strings"
+	"text/template"
+type StreamlinedInstance struct {
+	Name                                                                                         string
+	P, Q, RoundedBytes, RqBytes, W, SharedKeySize, CiphertextSize, PublicKeySize, PrivateKeySize int
+func (m StreamlinedInstance) Pkg() string {
+	return strings.ToLower(m.Name)
+type LPRInstance struct {
+	Name                                                                                         string
+	P, Q, RoundedBytes, RqBytes, W, SharedKeySize, CiphertextSize, PublicKeySize, PrivateKeySize int
+	Tau0, Tau1, Tau2, Tau3                                                                       int
+func (m LPRInstance) Pkg() string {
+	return strings.ToLower(m.Name)
+var (
+	StreamlinedInstances = []StreamlinedInstance{
+		{Name: "SNTRUP761", P: 761, Q: 4591, RoundedBytes: 1007, RqBytes: 1158, W: 286, SharedKeySize: 32, CiphertextSize: 1039, PublicKeySize: 1158, PrivateKeySize: 1763},
+		{Name: "SNTRUP653", P: 653, Q: 4621, RoundedBytes: 865, RqBytes: 994, W: 288, SharedKeySize: 32, CiphertextSize: 897, PublicKeySize: 994, PrivateKeySize: 1518},
+		{Name: "SNTRUP857", P: 857, Q: 5167, RoundedBytes: 1152, RqBytes: 1322, W: 322, SharedKeySize: 32, CiphertextSize: 1184, PublicKeySize: 1322, PrivateKeySize: 1999},
+		{Name: "SNTRUP953", P: 953, Q: 6343, RoundedBytes: 1317, RqBytes: 1505, W: 396, SharedKeySize: 32, CiphertextSize: 1349, PublicKeySize: 1505, PrivateKeySize: 2254},
+		{Name: "SNTRUP1013", P: 1013, Q: 7177, RoundedBytes: 1423, RqBytes: 1623, W: 448, SharedKeySize: 32, CiphertextSize: 1455, PublicKeySize: 1623, PrivateKeySize: 2417},
+		{Name: "SNTRUP1277", P: 1277, Q: 7879, RoundedBytes: 1815, RqBytes: 2067, W: 492, SharedKeySize: 32, CiphertextSize: 1847, PublicKeySize: 2067, PrivateKeySize: 3059},
+	}
+	LPRInstances = []LPRInstance{
+		{Name: "NTRULPR653", P: 653, Q: 4621, RoundedBytes: 865, W: 252, Tau0: 2175, Tau1: 113, Tau2: 2031, Tau3: 290, SharedKeySize: 32, CiphertextSize: 1025, PublicKeySize: 897, PrivateKeySize: 1125},
+		{Name: "NTRULPR761", P: 761, Q: 4591, RoundedBytes: 1007, W: 250, Tau0: 2156, Tau1: 114, Tau2: 2007, Tau3: 287, SharedKeySize: 32, CiphertextSize: 1167, PublicKeySize: 1039, PrivateKeySize: 1294},
+		{Name: "NTRULPR857", P: 857, Q: 5167, RoundedBytes: 1152, W: 281, Tau0: 2433, Tau1: 101, Tau2: 2265, Tau3: 324, SharedKeySize: 32, CiphertextSize: 1312, PublicKeySize: 1184, PrivateKeySize: 1463},
+		{Name: "NTRULPR953", P: 953, Q: 6343, RoundedBytes: 1317, W: 345, Tau0: 2997, Tau1: 82, Tau2: 2798, Tau3: 400, SharedKeySize: 32, CiphertextSize: 1477, PublicKeySize: 1349, PrivateKeySize: 1652},
+		{Name: "NTRULPR1013", P: 1013, Q: 7177, RoundedBytes: 1423, W: 392, Tau0: 3367, Tau1: 73, Tau2: 3143, Tau3: 449, SharedKeySize: 32, CiphertextSize: 1583, PublicKeySize: 1455, PrivateKeySize: 1773},
+		{Name: "NTRULPR1277", P: 1277, Q: 7879, RoundedBytes: 1815, W: 429, Tau0: 3724, Tau1: 66, Tau2: 3469, Tau3: 496, SharedKeySize: 32, CiphertextSize: 1975, PublicKeySize: 1847, PrivateKeySize: 2231},
+	}
+	TemplateWarning = "// Code generated from"
+func main() {
+	generatePackageFiles()
+	generateLPRFiles()
+func generatePackageFiles() {
+	template, err := template.ParseFiles("templates/sntrup.params.templ.go")
+	if err != nil {
+		panic(err)
+	}
+	for _, mode := range StreamlinedInstances {
+		buf := new(bytes.Buffer)
+		err := template.Execute(buf, mode)
+		if err != nil {
+			panic(err)
+		}
+		// Formating output code
+		code, err := format.Source(buf.Bytes())
+		if err != nil {
+			panic("error formating code")
+		}
+		res := string(code)
+		offset := strings.Index(res, TemplateWarning)
+		if offset == -1 {
+			panic("Missing template warning in pkg.templ.go")
+		}
+		err = ioutil.WriteFile(mode.Pkg()+"/params.go", []byte(res[offset:]), 0o600)
+		if err != nil {
+			panic(err)
+		}
+	}
+func generateLPRFiles() {
+	template, err := template.ParseFiles("templates/ntrulpr.params.templ.go")
+	if err != nil {
+		panic(err)
+	}
+	for _, mode := range LPRInstances {
+		buf := new(bytes.Buffer)
+		err := template.Execute(buf, mode)
+		if err != nil {
+			panic(err)
+		}
+		// Formating output code
+		code, err := format.Source(buf.Bytes())
+		if err != nil {
+			panic("error formating code")
+		}
+		res := string(code)
+		offset := strings.Index(res, TemplateWarning)
+		if offset == -1 {
+			panic("Missing template warning in pkg.templ.go")
+		}
+		err = ioutil.WriteFile(mode.Pkg()+"/params.go", []byte(res[offset:]), 0o600)
+		if err != nil {
+			panic(err)
+		}
+	}
diff --git a/pke/ntruprime/kem/kem.go b/pke/ntruprime/kem/kem.go
new file mode 100644
index 000000000..59bfd92c4
--- /dev/null
+++ b/pke/ntruprime/kem/kem.go
@@ -0,0 +1,20 @@
+// Package kem provides a unified interface for Streamlined NTRU Prime KEM schemes.
+// # A register of Streamlined NTRU Prime schemes is available in the package
+// github.com/cloudflare/circl/pke/ntruprime/kem/schemes/sntrup
+package kem
+import (
+	"github.com/cloudflare/circl/internal/nist"
+	"github.com/cloudflare/circl/kem"
+// A Scheme represents a specific instance of a NTRU PRIME KEM.
+type Scheme interface {
+	kem.Scheme
+	// DeriveKeyPairFromGen deterministicallly derives a pair of keys from a nist DRBG.
+	// Only used for deterministic testing
+	DeriveKeyPairFromGen(gen *nist.DRBG) (kem.PublicKey, kem.PrivateKey)
diff --git a/pke/ntruprime/kem/schemes/sntrup/schemes.go b/pke/ntruprime/kem/schemes/sntrup/schemes.go
new file mode 100644
index 000000000..1fc9b6f1e
--- /dev/null
+++ b/pke/ntruprime/kem/schemes/sntrup/schemes.go
@@ -0,0 +1,49 @@
+// Package schemes contains a register of Streamlined NTRU Prime KEM schemes.
+// # Schemes Implemented
+// Post-quantum kems:
+package sntrupSchemes
+import (
+	"strings"
+	"github.com/cloudflare/circl/kem/ntruprime/sntrup1013"
+	"github.com/cloudflare/circl/kem/ntruprime/sntrup1277"
+	"github.com/cloudflare/circl/kem/ntruprime/sntrup653"
+	"github.com/cloudflare/circl/kem/ntruprime/sntrup761"
+	"github.com/cloudflare/circl/kem/ntruprime/sntrup857"
+	"github.com/cloudflare/circl/kem/ntruprime/sntrup953"
+	"github.com/cloudflare/circl/pke/ntruprime/kem"
+var allSchemes = [...]kem.Scheme{
+	sntrup653.SntrupScheme(),
+	sntrup761.SntrupScheme(),
+	sntrup857.SntrupScheme(),
+	sntrup953.SntrupScheme(),
+	sntrup1013.SntrupScheme(),
+	sntrup1277.SntrupScheme(),
+var allSchemeNames map[string]kem.Scheme
+func init() {
+	allSchemeNames = make(map[string]kem.Scheme)
+	for _, scheme := range allSchemes {
+		allSchemeNames[strings.ToLower(scheme.Name())] = scheme
+	}
+// ByName returns the scheme with the given name and nil if it is not
+// supported.
+// Names are case insensitive.
+func ByName(name string) kem.Scheme {
+	return allSchemeNames[strings.ToLower(name)]
+// All returns all KEM schemes supported.
+func All() []kem.Scheme { a := allSchemes; return a[:] }
diff --git a/pke/ntruprime/ntrulpr1013/params.go b/pke/ntruprime/ntrulpr1013/params.go
new file mode 100644
index 000000000..0f767d5a9
--- /dev/null
+++ b/pke/ntruprime/ntrulpr1013/params.go
@@ -0,0 +1,31 @@
+// Code generated from ntrulpr.params.templ.go
+package ntruprime
+const (
+	P            = 1013
+	Q            = 7177
+	RoundedBytes = 1423
+	W    = 392
+	Tau0 = 3367
+	Tau1 = 73
+	Tau2 = 3143
+	Tau3 = 449
+	I = 256
+const (
+	// Size of the established shared key.
+	SharedKeySize = 32
+	// Size of the encapsulated shared key.
+	CiphertextSize = 1583
+	// Size of a packed public key.
+	PublicKeySize = 1455
+	// Size of a packed private key.
+	PrivateKeySize = 1773
diff --git a/pke/ntruprime/ntrulpr1277/params.go b/pke/ntruprime/ntrulpr1277/params.go
new file mode 100644
index 000000000..3837b235b
--- /dev/null
+++ b/pke/ntruprime/ntrulpr1277/params.go
@@ -0,0 +1,31 @@
+// Code generated from ntrulpr.params.templ.go
+package ntruprime
+const (
+	P            = 1277
+	Q            = 7879
+	RoundedBytes = 1815
+	W    = 429
+	Tau0 = 3724
+	Tau1 = 66
+	Tau2 = 3469
+	Tau3 = 496
+	I = 256
+const (
+	// Size of the established shared key.
+	SharedKeySize = 32
+	// Size of the encapsulated shared key.
+	CiphertextSize = 1975
+	// Size of a packed public key.
+	PublicKeySize = 1847
+	// Size of a packed private key.
+	PrivateKeySize = 2231
diff --git a/pke/ntruprime/ntrulpr653/params.go b/pke/ntruprime/ntrulpr653/params.go
new file mode 100644
index 000000000..01b2164de
--- /dev/null
+++ b/pke/ntruprime/ntrulpr653/params.go
@@ -0,0 +1,31 @@
+// Code generated from ntrulpr.params.templ.go
+package ntruprime
+const (
+	P            = 653
+	Q            = 4621
+	RoundedBytes = 865
+	W    = 252
+	Tau0 = 2175
+	Tau1 = 113
+	Tau2 = 2031
+	Tau3 = 290
+	I = 256
+const (
+	// Size of the established shared key.
+	SharedKeySize = 32
+	// Size of the encapsulated shared key.
+	CiphertextSize = 1025
+	// Size of a packed public key.
+	PublicKeySize = 897
+	// Size of a packed private key.
+	PrivateKeySize = 1125
diff --git a/pke/ntruprime/ntrulpr761/params.go b/pke/ntruprime/ntrulpr761/params.go
new file mode 100644
index 000000000..085eeb002
--- /dev/null
+++ b/pke/ntruprime/ntrulpr761/params.go
@@ -0,0 +1,31 @@
+// Code generated from ntrulpr.params.templ.go
+package ntruprime
+const (
+	P            = 761
+	Q            = 4591
+	RoundedBytes = 1007
+	W    = 250
+	Tau0 = 2156
+	Tau1 = 114
+	Tau2 = 2007
+	Tau3 = 287
+	I = 256
+const (
+	// Size of the established shared key.
+	SharedKeySize = 32
+	// Size of the encapsulated shared key.
+	CiphertextSize = 1167
+	// Size of a packed public key.
+	PublicKeySize = 1039
+	// Size of a packed private key.
+	PrivateKeySize = 1294
diff --git a/pke/ntruprime/ntrulpr857/params.go b/pke/ntruprime/ntrulpr857/params.go
new file mode 100644
index 000000000..d8bdaea78
--- /dev/null
+++ b/pke/ntruprime/ntrulpr857/params.go
@@ -0,0 +1,31 @@
+// Code generated from ntrulpr.params.templ.go
+package ntruprime
+const (
+	P            = 857
+	Q            = 5167
+	RoundedBytes = 1152
+	W    = 281
+	Tau0 = 2433
+	Tau1 = 101
+	Tau2 = 2265
+	Tau3 = 324
+	I = 256
+const (
+	// Size of the established shared key.
+	SharedKeySize = 32
+	// Size of the encapsulated shared key.
+	CiphertextSize = 1312
+	// Size of a packed public key.
+	PublicKeySize = 1184
+	// Size of a packed private key.
+	PrivateKeySize = 1463
diff --git a/pke/ntruprime/ntrulpr953/params.go b/pke/ntruprime/ntrulpr953/params.go
new file mode 100644
index 000000000..236dac9bd
--- /dev/null
+++ b/pke/ntruprime/ntrulpr953/params.go
@@ -0,0 +1,31 @@
+// Code generated from ntrulpr.params.templ.go
+package ntruprime
+const (
+	P            = 953
+	Q            = 6343
+	RoundedBytes = 1317
+	W    = 345
+	Tau0 = 2997
+	Tau1 = 82
+	Tau2 = 2798
+	Tau3 = 400
+	I = 256
+const (
+	// Size of the established shared key.
+	SharedKeySize = 32
+	// Size of the encapsulated shared key.
+	CiphertextSize = 1477
+	// Size of a packed public key.
+	PublicKeySize = 1349
+	// Size of a packed private key.
+	PrivateKeySize = 1652
diff --git a/pke/ntruprime/sntrup1013/params.go b/pke/ntruprime/sntrup1013/params.go
new file mode 100644
index 000000000..c0705606d
--- /dev/null
+++ b/pke/ntruprime/sntrup1013/params.go
@@ -0,0 +1,25 @@
+// Code generated from sntrup.params.templ.go. DO NOT EDIT.
+package ntruprime
+const (
+	P            = 1013
+	Q            = 7177
+	RoundedBytes = 1423
+	RqBytes      = 1623
+	W            = 448
+const (
+	// Size of the established shared key.
+	SharedKeySize = 32
+	// Size of the encapsulated shared key.
+	CiphertextSize = 1455
+	// Size of a packed public key.
+	PublicKeySize = 1623
+	// Size of a packed private key.
+	PrivateKeySize = 2417
diff --git a/pke/ntruprime/sntrup1277/params.go b/pke/ntruprime/sntrup1277/params.go
new file mode 100644
index 000000000..ffce7712d
--- /dev/null
+++ b/pke/ntruprime/sntrup1277/params.go
@@ -0,0 +1,25 @@
+// Code generated from sntrup.params.templ.go. DO NOT EDIT.
+package ntruprime
+const (
+	P            = 1277
+	Q            = 7879
+	RoundedBytes = 1815
+	RqBytes      = 2067
+	W            = 492
+const (
+	// Size of the established shared key.
+	SharedKeySize = 32
+	// Size of the encapsulated shared key.
+	CiphertextSize = 1847
+	// Size of a packed public key.
+	PublicKeySize = 2067
+	// Size of a packed private key.
+	PrivateKeySize = 3059
diff --git a/pke/ntruprime/sntrup653/params.go b/pke/ntruprime/sntrup653/params.go
new file mode 100644
index 000000000..c2623eb74
--- /dev/null
+++ b/pke/ntruprime/sntrup653/params.go
@@ -0,0 +1,25 @@
+// Code generated from sntrup.params.templ.go. DO NOT EDIT.
+package ntruprime
+const (
+	P            = 653
+	Q            = 4621
+	RoundedBytes = 865
+	RqBytes      = 994
+	W            = 288
+const (
+	// Size of the established shared key.
+	SharedKeySize = 32
+	// Size of the encapsulated shared key.
+	CiphertextSize = 897
+	// Size of a packed public key.
+	PublicKeySize = 994
+	// Size of a packed private key.
+	PrivateKeySize = 1518
diff --git a/pke/ntruprime/sntrup761/params.go b/pke/ntruprime/sntrup761/params.go
new file mode 100644
index 000000000..dde2fd93c
--- /dev/null
+++ b/pke/ntruprime/sntrup761/params.go
@@ -0,0 +1,25 @@
+// Code generated from sntrup.params.templ.go. DO NOT EDIT.
+package ntruprime
+const (
+	P            = 761
+	Q            = 4591
+	RoundedBytes = 1007
+	RqBytes      = 1158
+	W            = 286
+const (
+	// Size of the established shared key.
+	SharedKeySize = 32
+	// Size of the encapsulated shared key.
+	CiphertextSize = 1039
+	// Size of a packed public key.
+	PublicKeySize = 1158
+	// Size of a packed private key.
+	PrivateKeySize = 1763
diff --git a/pke/ntruprime/sntrup857/params.go b/pke/ntruprime/sntrup857/params.go
new file mode 100644
index 000000000..c88d27812
--- /dev/null
+++ b/pke/ntruprime/sntrup857/params.go
@@ -0,0 +1,25 @@
+// Code generated from sntrup.params.templ.go. DO NOT EDIT.
+package ntruprime
+const (
+	P            = 857
+	Q            = 5167
+	RoundedBytes = 1152
+	RqBytes      = 1322
+	W            = 322
+const (
+	// Size of the established shared key.
+	SharedKeySize = 32
+	// Size of the encapsulated shared key.
+	CiphertextSize = 1184
+	// Size of a packed public key.
+	PublicKeySize = 1322
+	// Size of a packed private key.
+	PrivateKeySize = 1999
diff --git a/pke/ntruprime/sntrup953/params.go b/pke/ntruprime/sntrup953/params.go
new file mode 100644
index 000000000..437b3efe6
--- /dev/null
+++ b/pke/ntruprime/sntrup953/params.go
@@ -0,0 +1,25 @@
+// Code generated from sntrup.params.templ.go. DO NOT EDIT.
+package ntruprime
+const (
+	P            = 953
+	Q            = 6343
+	RoundedBytes = 1317
+	RqBytes      = 1505
+	W            = 396
+const (
+	// Size of the established shared key.
+	SharedKeySize = 32
+	// Size of the encapsulated shared key.
+	CiphertextSize = 1349
+	// Size of a packed public key.
+	PublicKeySize = 1505
+	// Size of a packed private key.
+	PrivateKeySize = 2254
diff --git a/pke/ntruprime/templates/ntrulpr.params.templ.go b/pke/ntruprime/templates/ntrulpr.params.templ.go
new file mode 100644
index 000000000..58dc9c1b6
--- /dev/null
+++ b/pke/ntruprime/templates/ntrulpr.params.templ.go
@@ -0,0 +1,35 @@
+// +build ignore
+// The previous line (and this one up to the warning below) is removed by the
+// template generator.
+// Code generated from ntrulpr.params.templ.go
+package ntruprime
+const (
+	P             = {{.P}}
+	Q             = {{.Q}}
+	RoundedBytes = {{.RoundedBytes}}
+	W    = {{.W}} 
+	Tau0 = {{.Tau0}}
+	Tau1 = {{.Tau1}}
+	Tau2 = {{.Tau2}}
+	Tau3 = {{.Tau3}}
+	I = 256
+const (
+	// Size of the established shared key.
+	SharedKeySize = {{.SharedKeySize}}
+	// Size of the encapsulated shared key.
+	CiphertextSize = {{.CiphertextSize}}
+	// Size of a packed public key.
+	PublicKeySize = {{.PublicKeySize}}
+	// Size of a packed private key.
+	PrivateKeySize = {{.PrivateKeySize}}
diff --git a/pke/ntruprime/templates/sntrup.params.templ.go b/pke/ntruprime/templates/sntrup.params.templ.go
new file mode 100644
index 000000000..7e890b5a4
--- /dev/null
+++ b/pke/ntruprime/templates/sntrup.params.templ.go
@@ -0,0 +1,28 @@
+// +build ignore
+// The previous line (and this one up to the warning below) is removed by the
+// template generator.
+// Code generated from sntrup.params.templ.go. DO NOT EDIT.
+package ntruprime
+const (
+	P             = {{.P}}
+	Q             = {{.Q}}
+	RoundedBytes = {{.RoundedBytes}}
+	RqBytes      = {{.RqBytes}}
+	W             = {{.W}}
+const (
+	// Size of the established shared key.
+	SharedKeySize = {{.SharedKeySize}}
+	// Size of the encapsulated shared key.
+	CiphertextSize = {{.CiphertextSize}}
+	// Size of a packed public key.
+	PublicKeySize = {{.PublicKeySize}}
+	// Size of a packed private key.
+	PrivateKeySize = {{.PrivateKeySize}}