// Copyright 2020-2025 Consensys Software Inc. // Licensed under the Apache License, Version 2.0. See the LICENSE file for details. // Code generated by consensys/gnark-crypto DO NOT EDIT package fp import ( "crypto/rand" "encoding/binary" "errors" "io" "math/big" "math/bits" "reflect" "strconv" "strings" "github.com/bits-and-blooms/bitset" "github.com/consensys/gnark-crypto/field/hash" "github.com/consensys/gnark-crypto/field/pool" ) // Element represents a field element stored on 6 words (uint64) // // Element are assumed to be in Montgomery form in all methods. // // Modulus q = // // q[base10] = 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787 // q[base16] = 0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab // // # Warning // // This code has not been audited and is provided as-is. In particular, there is no security guarantees such as constant time implementation or side-channel attack resistance. type Element [6]uint64 const ( Limbs = 6 // number of 64 bits words needed to represent a Element Bits = 381 // number of bits needed to represent a Element Bytes = 48 // number of bytes needed to represent a Element ) // Field modulus q const ( q0 = 13402431016077863595 q1 = 2210141511517208575 q2 = 7435674573564081700 q3 = 7239337960414712511 q4 = 5412103778470702295 q5 = 1873798617647539866 ) var qElement = Element{ q0, q1, q2, q3, q4, q5, } var _modulus big.Int // q stored as big.Int // Modulus returns q as a big.Int // // q[base10] = 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787 // q[base16] = 0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab func Modulus() *big.Int { return new(big.Int).Set(&_modulus) } // q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r // used for Montgomery reduction const qInvNeg = 9940570264628428797 func init() { _modulus.SetString("1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab", 16) } // NewElement returns a new Element from a uint64 value // // it is equivalent to // // var v Element // v.SetUint64(...) func NewElement(v uint64) Element { z := Element{v} z.Mul(&z, &rSquare) return z } // SetUint64 sets z to v and returns z func (z *Element) SetUint64(v uint64) *Element { // sets z LSB to v (non-Montgomery form) and convert z to Montgomery form *z = Element{v} return z.Mul(z, &rSquare) // z.toMont() } // SetInt64 sets z to v and returns z func (z *Element) SetInt64(v int64) *Element { // absolute value of v m := v >> 63 z.SetUint64(uint64((v ^ m) - m)) if m != 0 { // v is negative z.Neg(z) } return z } // Set z = x and returns z func (z *Element) Set(x *Element) *Element { z[0] = x[0] z[1] = x[1] z[2] = x[2] z[3] = x[3] z[4] = x[4] z[5] = x[5] return z } // SetInterface converts provided interface into Element // returns an error if provided type is not supported. // supported types: // // Element // *Element // uint64 // int // string (see SetString for valid formats) // *big.Int // big.Int // []byte func (z *Element) SetInterface(i1 interface{}) (*Element, error) { if i1 == nil { return nil, errors.New("can't set fp.Element with ") } switch c1 := i1.(type) { case Element: return z.Set(&c1), nil case *Element: if c1 == nil { return nil, errors.New("can't set fp.Element with ") } return z.Set(c1), nil case uint8: return z.SetUint64(uint64(c1)), nil case uint16: return z.SetUint64(uint64(c1)), nil case uint32: return z.SetUint64(uint64(c1)), nil case uint: return z.SetUint64(uint64(c1)), nil case uint64: return z.SetUint64(c1), nil case int8: return z.SetInt64(int64(c1)), nil case int16: return z.SetInt64(int64(c1)), nil case int32: return z.SetInt64(int64(c1)), nil case int64: return z.SetInt64(c1), nil case int: return z.SetInt64(int64(c1)), nil case string: return z.SetString(c1) case *big.Int: if c1 == nil { return nil, errors.New("can't set fp.Element with ") } return z.SetBigInt(c1), nil case big.Int: return z.SetBigInt(&c1), nil case []byte: return z.SetBytes(c1), nil default: return nil, errors.New("can't set fp.Element from type " + reflect.TypeOf(i1).String()) } } // SetZero z = 0 func (z *Element) SetZero() *Element { z[0] = 0 z[1] = 0 z[2] = 0 z[3] = 0 z[4] = 0 z[5] = 0 return z } // SetOne z = 1 (in Montgomery form) func (z *Element) SetOne() *Element { z[0] = 8505329371266088957 z[1] = 17002214543764226050 z[2] = 6865905132761471162 z[3] = 8632934651105793861 z[4] = 6631298214892334189 z[5] = 1582556514881692819 return z } // Div z = x*y⁻¹ (mod q) func (z *Element) Div(x, y *Element) *Element { var yInv Element yInv.Inverse(y) z.Mul(x, &yInv) return z } // Equal returns z == x; constant-time func (z *Element) Equal(x *Element) bool { return z.NotEqual(x) == 0 } // NotEqual returns 0 if and only if z == x; constant-time func (z *Element) NotEqual(x *Element) uint64 { return (z[5] ^ x[5]) | (z[4] ^ x[4]) | (z[3] ^ x[3]) | (z[2] ^ x[2]) | (z[1] ^ x[1]) | (z[0] ^ x[0]) } // IsZero returns z == 0 func (z *Element) IsZero() bool { return (z[5] | z[4] | z[3] | z[2] | z[1] | z[0]) == 0 } // IsOne returns z == 1 func (z *Element) IsOne() bool { return ((z[5] ^ 1582556514881692819) | (z[4] ^ 6631298214892334189) | (z[3] ^ 8632934651105793861) | (z[2] ^ 6865905132761471162) | (z[1] ^ 17002214543764226050) | (z[0] ^ 8505329371266088957)) == 0 } // IsUint64 reports whether z can be represented as an uint64. func (z *Element) IsUint64() bool { zz := *z zz.fromMont() return zz.FitsOnOneWord() } // Uint64 returns the uint64 representation of x. If x cannot be represented in a uint64, the result is undefined. func (z *Element) Uint64() uint64 { return z.Bits()[0] } // FitsOnOneWord reports whether z words (except the least significant word) are 0 // // It is the responsibility of the caller to convert from Montgomery to Regular form if needed. func (z *Element) FitsOnOneWord() bool { return (z[5] | z[4] | z[3] | z[2] | z[1]) == 0 } // Cmp compares (lexicographic order) z and x and returns: // // -1 if z < x // 0 if z == x // +1 if z > x func (z *Element) Cmp(x *Element) int { _z := z.Bits() _x := x.Bits() if _z[5] > _x[5] { return 1 } else if _z[5] < _x[5] { return -1 } if _z[4] > _x[4] { return 1 } else if _z[4] < _x[4] { return -1 } if _z[3] > _x[3] { return 1 } else if _z[3] < _x[3] { return -1 } if _z[2] > _x[2] { return 1 } else if _z[2] < _x[2] { return -1 } if _z[1] > _x[1] { return 1 } else if _z[1] < _x[1] { return -1 } if _z[0] > _x[0] { return 1 } else if _z[0] < _x[0] { return -1 } return 0 } // LexicographicallyLargest returns true if this element is strictly lexicographically // larger than its negation, false otherwise func (z *Element) LexicographicallyLargest() bool { // adapted from github.com/zkcrypto/bls12_381 // we check if the element is larger than (q-1) / 2 // if z - (((q -1) / 2) + 1) have no underflow, then z > (q-1) / 2 _z := z.Bits() var b uint64 _, b = bits.Sub64(_z[0], 15924587544893707606, 0) _, b = bits.Sub64(_z[1], 1105070755758604287, b) _, b = bits.Sub64(_z[2], 12941209323636816658, b) _, b = bits.Sub64(_z[3], 12843041017062132063, b) _, b = bits.Sub64(_z[4], 2706051889235351147, b) _, b = bits.Sub64(_z[5], 936899308823769933, b) return b == 0 } // SetRandom sets z to a uniform random value in [0, q). // // This might error only if reading from crypto/rand.Reader errors, // in which case, value of z is undefined. func (z *Element) SetRandom() (*Element, error) { // this code is generated for all modulus // and derived from go/src/crypto/rand/util.go // l is number of limbs * 8; the number of bytes needed to reconstruct 6 uint64 const l = 48 // bitLen is the maximum bit length needed to encode a value < q. const bitLen = 381 // k is the maximum byte length needed to encode a value < q. const k = (bitLen + 7) / 8 // b is the number of bits in the most significant byte of q-1. b := uint(bitLen % 8) if b == 0 { b = 8 } var bytes [l]byte for { // note that bytes[k:l] is always 0 if _, err := io.ReadFull(rand.Reader, bytes[:k]); err != nil { return nil, err } // Clear unused bits in in the most significant byte to increase probability // that the candidate is < q. bytes[k-1] &= uint8(int(1<> 1 z[0] = z[0]>>1 | z[1]<<63 z[1] = z[1]>>1 | z[2]<<63 z[2] = z[2]>>1 | z[3]<<63 z[3] = z[3]>>1 | z[4]<<63 z[4] = z[4]>>1 | z[5]<<63 z[5] >>= 1 } // fromMont converts z in place (i.e. mutates) from Montgomery to regular representation // sets and returns z = z * 1 func (z *Element) fromMont() *Element { fromMont(z) return z } // Add z = x + y (mod q) func (z *Element) Add(x, y *Element) *Element { var carry uint64 z[0], carry = bits.Add64(x[0], y[0], 0) z[1], carry = bits.Add64(x[1], y[1], carry) z[2], carry = bits.Add64(x[2], y[2], carry) z[3], carry = bits.Add64(x[3], y[3], carry) z[4], carry = bits.Add64(x[4], y[4], carry) z[5], _ = bits.Add64(x[5], y[5], carry) // if z ⩾ q → z -= q if !z.smallerThanModulus() { var b uint64 z[0], b = bits.Sub64(z[0], q0, 0) z[1], b = bits.Sub64(z[1], q1, b) z[2], b = bits.Sub64(z[2], q2, b) z[3], b = bits.Sub64(z[3], q3, b) z[4], b = bits.Sub64(z[4], q4, b) z[5], _ = bits.Sub64(z[5], q5, b) } return z } // Double z = x + x (mod q), aka Lsh 1 func (z *Element) Double(x *Element) *Element { var carry uint64 z[0], carry = bits.Add64(x[0], x[0], 0) z[1], carry = bits.Add64(x[1], x[1], carry) z[2], carry = bits.Add64(x[2], x[2], carry) z[3], carry = bits.Add64(x[3], x[3], carry) z[4], carry = bits.Add64(x[4], x[4], carry) z[5], _ = bits.Add64(x[5], x[5], carry) // if z ⩾ q → z -= q if !z.smallerThanModulus() { var b uint64 z[0], b = bits.Sub64(z[0], q0, 0) z[1], b = bits.Sub64(z[1], q1, b) z[2], b = bits.Sub64(z[2], q2, b) z[3], b = bits.Sub64(z[3], q3, b) z[4], b = bits.Sub64(z[4], q4, b) z[5], _ = bits.Sub64(z[5], q5, b) } return z } // Sub z = x - y (mod q) func (z *Element) Sub(x, y *Element) *Element { var b uint64 z[0], b = bits.Sub64(x[0], y[0], 0) z[1], b = bits.Sub64(x[1], y[1], b) z[2], b = bits.Sub64(x[2], y[2], b) z[3], b = bits.Sub64(x[3], y[3], b) z[4], b = bits.Sub64(x[4], y[4], b) z[5], b = bits.Sub64(x[5], y[5], b) if b != 0 { var c uint64 z[0], c = bits.Add64(z[0], q0, 0) z[1], c = bits.Add64(z[1], q1, c) z[2], c = bits.Add64(z[2], q2, c) z[3], c = bits.Add64(z[3], q3, c) z[4], c = bits.Add64(z[4], q4, c) z[5], _ = bits.Add64(z[5], q5, c) } return z } // Neg z = q - x func (z *Element) Neg(x *Element) *Element { if x.IsZero() { z.SetZero() return z } var borrow uint64 z[0], borrow = bits.Sub64(q0, x[0], 0) z[1], borrow = bits.Sub64(q1, x[1], borrow) z[2], borrow = bits.Sub64(q2, x[2], borrow) z[3], borrow = bits.Sub64(q3, x[3], borrow) z[4], borrow = bits.Sub64(q4, x[4], borrow) z[5], _ = bits.Sub64(q5, x[5], borrow) return z } // Select is a constant-time conditional move. // If c=0, z = x0. Else z = x1 func (z *Element) Select(c int, x0 *Element, x1 *Element) *Element { cC := uint64((int64(c) | -int64(c)) >> 63) // "canonicized" into: 0 if c=0, -1 otherwise z[0] = x0[0] ^ cC&(x0[0]^x1[0]) z[1] = x0[1] ^ cC&(x0[1]^x1[1]) z[2] = x0[2] ^ cC&(x0[2]^x1[2]) z[3] = x0[3] ^ cC&(x0[3]^x1[3]) z[4] = x0[4] ^ cC&(x0[4]^x1[4]) z[5] = x0[5] ^ cC&(x0[5]^x1[5]) return z } // _mulGeneric is unoptimized textbook CIOS // it is a fallback solution on x86 when ADX instruction set is not available // and is used for testing purposes. func _mulGeneric(z, x, y *Element) { // Algorithm 2 of "Faster Montgomery Multiplication and Multi-Scalar-Multiplication for SNARKS" // by Y. El Housni and G. Botrel https://doi.org/10.46586/tches.v2023.i3.504-521 var t [7]uint64 var D uint64 var m, C uint64 // ----------------------------------- // First loop C, t[0] = bits.Mul64(y[0], x[0]) C, t[1] = madd1(y[0], x[1], C) C, t[2] = madd1(y[0], x[2], C) C, t[3] = madd1(y[0], x[3], C) C, t[4] = madd1(y[0], x[4], C) C, t[5] = madd1(y[0], x[5], C) t[6], D = bits.Add64(t[6], C, 0) // m = t[0]n'[0] mod W m = t[0] * qInvNeg // ----------------------------------- // Second loop C = madd0(m, q0, t[0]) C, t[0] = madd2(m, q1, t[1], C) C, t[1] = madd2(m, q2, t[2], C) C, t[2] = madd2(m, q3, t[3], C) C, t[3] = madd2(m, q4, t[4], C) C, t[4] = madd2(m, q5, t[5], C) t[5], C = bits.Add64(t[6], C, 0) t[6], _ = bits.Add64(0, D, C) // ----------------------------------- // First loop C, t[0] = madd1(y[1], x[0], t[0]) C, t[1] = madd2(y[1], x[1], t[1], C) C, t[2] = madd2(y[1], x[2], t[2], C) C, t[3] = madd2(y[1], x[3], t[3], C) C, t[4] = madd2(y[1], x[4], t[4], C) C, t[5] = madd2(y[1], x[5], t[5], C) t[6], D = bits.Add64(t[6], C, 0) // m = t[0]n'[0] mod W m = t[0] * qInvNeg // ----------------------------------- // Second loop C = madd0(m, q0, t[0]) C, t[0] = madd2(m, q1, t[1], C) C, t[1] = madd2(m, q2, t[2], C) C, t[2] = madd2(m, q3, t[3], C) C, t[3] = madd2(m, q4, t[4], C) C, t[4] = madd2(m, q5, t[5], C) t[5], C = bits.Add64(t[6], C, 0) t[6], _ = bits.Add64(0, D, C) // ----------------------------------- // First loop C, t[0] = madd1(y[2], x[0], t[0]) C, t[1] = madd2(y[2], x[1], t[1], C) C, t[2] = madd2(y[2], x[2], t[2], C) C, t[3] = madd2(y[2], x[3], t[3], C) C, t[4] = madd2(y[2], x[4], t[4], C) C, t[5] = madd2(y[2], x[5], t[5], C) t[6], D = bits.Add64(t[6], C, 0) // m = t[0]n'[0] mod W m = t[0] * qInvNeg // ----------------------------------- // Second loop C = madd0(m, q0, t[0]) C, t[0] = madd2(m, q1, t[1], C) C, t[1] = madd2(m, q2, t[2], C) C, t[2] = madd2(m, q3, t[3], C) C, t[3] = madd2(m, q4, t[4], C) C, t[4] = madd2(m, q5, t[5], C) t[5], C = bits.Add64(t[6], C, 0) t[6], _ = bits.Add64(0, D, C) // ----------------------------------- // First loop C, t[0] = madd1(y[3], x[0], t[0]) C, t[1] = madd2(y[3], x[1], t[1], C) C, t[2] = madd2(y[3], x[2], t[2], C) C, t[3] = madd2(y[3], x[3], t[3], C) C, t[4] = madd2(y[3], x[4], t[4], C) C, t[5] = madd2(y[3], x[5], t[5], C) t[6], D = bits.Add64(t[6], C, 0) // m = t[0]n'[0] mod W m = t[0] * qInvNeg // ----------------------------------- // Second loop C = madd0(m, q0, t[0]) C, t[0] = madd2(m, q1, t[1], C) C, t[1] = madd2(m, q2, t[2], C) C, t[2] = madd2(m, q3, t[3], C) C, t[3] = madd2(m, q4, t[4], C) C, t[4] = madd2(m, q5, t[5], C) t[5], C = bits.Add64(t[6], C, 0) t[6], _ = bits.Add64(0, D, C) // ----------------------------------- // First loop C, t[0] = madd1(y[4], x[0], t[0]) C, t[1] = madd2(y[4], x[1], t[1], C) C, t[2] = madd2(y[4], x[2], t[2], C) C, t[3] = madd2(y[4], x[3], t[3], C) C, t[4] = madd2(y[4], x[4], t[4], C) C, t[5] = madd2(y[4], x[5], t[5], C) t[6], D = bits.Add64(t[6], C, 0) // m = t[0]n'[0] mod W m = t[0] * qInvNeg // ----------------------------------- // Second loop C = madd0(m, q0, t[0]) C, t[0] = madd2(m, q1, t[1], C) C, t[1] = madd2(m, q2, t[2], C) C, t[2] = madd2(m, q3, t[3], C) C, t[3] = madd2(m, q4, t[4], C) C, t[4] = madd2(m, q5, t[5], C) t[5], C = bits.Add64(t[6], C, 0) t[6], _ = bits.Add64(0, D, C) // ----------------------------------- // First loop C, t[0] = madd1(y[5], x[0], t[0]) C, t[1] = madd2(y[5], x[1], t[1], C) C, t[2] = madd2(y[5], x[2], t[2], C) C, t[3] = madd2(y[5], x[3], t[3], C) C, t[4] = madd2(y[5], x[4], t[4], C) C, t[5] = madd2(y[5], x[5], t[5], C) t[6], D = bits.Add64(t[6], C, 0) // m = t[0]n'[0] mod W m = t[0] * qInvNeg // ----------------------------------- // Second loop C = madd0(m, q0, t[0]) C, t[0] = madd2(m, q1, t[1], C) C, t[1] = madd2(m, q2, t[2], C) C, t[2] = madd2(m, q3, t[3], C) C, t[3] = madd2(m, q4, t[4], C) C, t[4] = madd2(m, q5, t[5], C) t[5], C = bits.Add64(t[6], C, 0) t[6], _ = bits.Add64(0, D, C) if t[6] != 0 { // we need to reduce, we have a result on 7 words var b uint64 z[0], b = bits.Sub64(t[0], q0, 0) z[1], b = bits.Sub64(t[1], q1, b) z[2], b = bits.Sub64(t[2], q2, b) z[3], b = bits.Sub64(t[3], q3, b) z[4], b = bits.Sub64(t[4], q4, b) z[5], _ = bits.Sub64(t[5], q5, b) return } // copy t into z z[0] = t[0] z[1] = t[1] z[2] = t[2] z[3] = t[3] z[4] = t[4] z[5] = t[5] // if z ⩾ q → z -= q if !z.smallerThanModulus() { var b uint64 z[0], b = bits.Sub64(z[0], q0, 0) z[1], b = bits.Sub64(z[1], q1, b) z[2], b = bits.Sub64(z[2], q2, b) z[3], b = bits.Sub64(z[3], q3, b) z[4], b = bits.Sub64(z[4], q4, b) z[5], _ = bits.Sub64(z[5], q5, b) } } func _fromMontGeneric(z *Element) { // the following lines implement z = z * 1 // with a modified CIOS montgomery multiplication // see Mul for algorithm documentation { // m = z[0]n'[0] mod W m := z[0] * qInvNeg C := madd0(m, q0, z[0]) C, z[0] = madd2(m, q1, z[1], C) C, z[1] = madd2(m, q2, z[2], C) C, z[2] = madd2(m, q3, z[3], C) C, z[3] = madd2(m, q4, z[4], C) C, z[4] = madd2(m, q5, z[5], C) z[5] = C } { // m = z[0]n'[0] mod W m := z[0] * qInvNeg C := madd0(m, q0, z[0]) C, z[0] = madd2(m, q1, z[1], C) C, z[1] = madd2(m, q2, z[2], C) C, z[2] = madd2(m, q3, z[3], C) C, z[3] = madd2(m, q4, z[4], C) C, z[4] = madd2(m, q5, z[5], C) z[5] = C } { // m = z[0]n'[0] mod W m := z[0] * qInvNeg C := madd0(m, q0, z[0]) C, z[0] = madd2(m, q1, z[1], C) C, z[1] = madd2(m, q2, z[2], C) C, z[2] = madd2(m, q3, z[3], C) C, z[3] = madd2(m, q4, z[4], C) C, z[4] = madd2(m, q5, z[5], C) z[5] = C } { // m = z[0]n'[0] mod W m := z[0] * qInvNeg C := madd0(m, q0, z[0]) C, z[0] = madd2(m, q1, z[1], C) C, z[1] = madd2(m, q2, z[2], C) C, z[2] = madd2(m, q3, z[3], C) C, z[3] = madd2(m, q4, z[4], C) C, z[4] = madd2(m, q5, z[5], C) z[5] = C } { // m = z[0]n'[0] mod W m := z[0] * qInvNeg C := madd0(m, q0, z[0]) C, z[0] = madd2(m, q1, z[1], C) C, z[1] = madd2(m, q2, z[2], C) C, z[2] = madd2(m, q3, z[3], C) C, z[3] = madd2(m, q4, z[4], C) C, z[4] = madd2(m, q5, z[5], C) z[5] = C } { // m = z[0]n'[0] mod W m := z[0] * qInvNeg C := madd0(m, q0, z[0]) C, z[0] = madd2(m, q1, z[1], C) C, z[1] = madd2(m, q2, z[2], C) C, z[2] = madd2(m, q3, z[3], C) C, z[3] = madd2(m, q4, z[4], C) C, z[4] = madd2(m, q5, z[5], C) z[5] = C } // if z ⩾ q → z -= q if !z.smallerThanModulus() { var b uint64 z[0], b = bits.Sub64(z[0], q0, 0) z[1], b = bits.Sub64(z[1], q1, b) z[2], b = bits.Sub64(z[2], q2, b) z[3], b = bits.Sub64(z[3], q3, b) z[4], b = bits.Sub64(z[4], q4, b) z[5], _ = bits.Sub64(z[5], q5, b) } } func _reduceGeneric(z *Element) { // if z ⩾ q → z -= q if !z.smallerThanModulus() { var b uint64 z[0], b = bits.Sub64(z[0], q0, 0) z[1], b = bits.Sub64(z[1], q1, b) z[2], b = bits.Sub64(z[2], q2, b) z[3], b = bits.Sub64(z[3], q3, b) z[4], b = bits.Sub64(z[4], q4, b) z[5], _ = bits.Sub64(z[5], q5, b) } } // BatchInvert returns a new slice with every element inverted. // Uses Montgomery batch inversion trick func BatchInvert(a []Element) []Element { res := make([]Element, len(a)) if len(a) == 0 { return res } zeroes := bitset.New(uint(len(a))) accumulator := One() for i := 0; i < len(a); i++ { if a[i].IsZero() { zeroes.Set(uint(i)) continue } res[i] = accumulator accumulator.Mul(&accumulator, &a[i]) } accumulator.Inverse(&accumulator) for i := len(a) - 1; i >= 0; i-- { if zeroes.Test(uint(i)) { continue } res[i].Mul(&res[i], &accumulator) accumulator.Mul(&accumulator, &a[i]) } return res } func _butterflyGeneric(a, b *Element) { t := *a a.Add(a, b) b.Sub(&t, b) } // BitLen returns the minimum number of bits needed to represent z // returns 0 if z == 0 func (z *Element) BitLen() int { if z[5] != 0 { return 320 + bits.Len64(z[5]) } if z[4] != 0 { return 256 + bits.Len64(z[4]) } if z[3] != 0 { return 192 + bits.Len64(z[3]) } if z[2] != 0 { return 128 + bits.Len64(z[2]) } if z[1] != 0 { return 64 + bits.Len64(z[1]) } return bits.Len64(z[0]) } // Hash msg to count prime field elements. // https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-5.2 func Hash(msg, dst []byte, count int) ([]Element, error) { // 128 bits of security // L = ceil((ceil(log2(p)) + k) / 8), where k is the security parameter = 128 const Bytes = 1 + (Bits-1)/8 const L = 16 + Bytes lenInBytes := count * L pseudoRandomBytes, err := hash.ExpandMsgXmd(msg, dst, lenInBytes) if err != nil { return nil, err } // get temporary big int from the pool vv := pool.BigInt.Get() res := make([]Element, count) for i := 0; i < count; i++ { vv.SetBytes(pseudoRandomBytes[i*L : (i+1)*L]) res[i].SetBigInt(vv) } // release object into pool pool.BigInt.Put(vv) return res, nil } // Exp z = xᵏ (mod q) func (z *Element) Exp(x Element, k *big.Int) *Element { if k.IsUint64() && k.Uint64() == 0 { return z.SetOne() } e := k if k.Sign() == -1 { // negative k, we invert // if k < 0: xᵏ (mod q) == (x⁻¹)ᵏ (mod q) x.Inverse(&x) // we negate k in a temp big.Int since // Int.Bit(_) of k and -k is different e = pool.BigInt.Get() defer pool.BigInt.Put(e) e.Neg(k) } z.Set(&x) for i := e.BitLen() - 2; i >= 0; i-- { z.Square(z) if e.Bit(i) == 1 { z.Mul(z, &x) } } return z } // rSquare where r is the Montgommery constant // see section 2.3.2 of Tolga Acar's thesis // https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf var rSquare = Element{ 17644856173732828998, 754043588434789617, 10224657059481499349, 7488229067341005760, 11130996698012816685, 1267921511277847466, } // toMont converts z to Montgomery form // sets and returns z = z * r² func (z *Element) toMont() *Element { return z.Mul(z, &rSquare) } // String returns the decimal representation of z as generated by // z.Text(10). func (z *Element) String() string { return z.Text(10) } // toBigInt returns z as a big.Int in Montgomery form func (z *Element) toBigInt(res *big.Int) *big.Int { var b [Bytes]byte binary.BigEndian.PutUint64(b[40:48], z[0]) binary.BigEndian.PutUint64(b[32:40], z[1]) binary.BigEndian.PutUint64(b[24:32], z[2]) binary.BigEndian.PutUint64(b[16:24], z[3]) binary.BigEndian.PutUint64(b[8:16], z[4]) binary.BigEndian.PutUint64(b[0:8], z[5]) return res.SetBytes(b[:]) } // Text returns the string representation of z in the given base. // Base must be between 2 and 36, inclusive. The result uses the // lower-case letters 'a' to 'z' for digit values 10 to 35. // No prefix (such as "0x") is added to the string. If z is a nil // pointer it returns "". // If base == 10 and -z fits in a uint16 prefix "-" is added to the string. func (z *Element) Text(base int) string { if base < 2 || base > 36 { panic("invalid base") } if z == nil { return "" } const maxUint16 = 65535 if base == 10 { var zzNeg Element zzNeg.Neg(z) zzNeg.fromMont() if zzNeg.FitsOnOneWord() && zzNeg[0] <= maxUint16 && zzNeg[0] != 0 { return "-" + strconv.FormatUint(zzNeg[0], base) } } zz := *z zz.fromMont() if zz.FitsOnOneWord() { return strconv.FormatUint(zz[0], base) } vv := pool.BigInt.Get() r := zz.toBigInt(vv).Text(base) pool.BigInt.Put(vv) return r } // BigInt sets and return z as a *big.Int func (z *Element) BigInt(res *big.Int) *big.Int { _z := *z _z.fromMont() return _z.toBigInt(res) } // ToBigIntRegular returns z as a big.Int in regular form // // Deprecated: use BigInt(*big.Int) instead func (z Element) ToBigIntRegular(res *big.Int) *big.Int { z.fromMont() return z.toBigInt(res) } // Bits provides access to z by returning its value as a little-endian [6]uint64 array. // Bits is intended to support implementation of missing low-level Element // functionality outside this package; it should be avoided otherwise. func (z *Element) Bits() [6]uint64 { _z := *z fromMont(&_z) return _z } // Bytes returns the value of z as a big-endian byte array func (z *Element) Bytes() (res [Bytes]byte) { BigEndian.PutElement(&res, *z) return } // Marshal returns the value of z as a big-endian byte slice func (z *Element) Marshal() []byte { b := z.Bytes() return b[:] } // Unmarshal is an alias for SetBytes, it sets z to the value of e. func (z *Element) Unmarshal(e []byte) { z.SetBytes(e) } // SetBytes interprets e as the bytes of a big-endian unsigned integer, // sets z to that value, and returns z. func (z *Element) SetBytes(e []byte) *Element { if len(e) == Bytes { // fast path v, err := BigEndian.Element((*[Bytes]byte)(e)) if err == nil { *z = v return z } } // slow path. // get a big int from our pool vv := pool.BigInt.Get() vv.SetBytes(e) // set big int z.SetBigInt(vv) // put temporary object back in pool pool.BigInt.Put(vv) return z } // SetBytesCanonical interprets e as the bytes of a big-endian 48-byte integer. // If e is not a 48-byte slice or encodes a value higher than q, // SetBytesCanonical returns an error. func (z *Element) SetBytesCanonical(e []byte) error { if len(e) != Bytes { return errors.New("invalid fp.Element encoding") } v, err := BigEndian.Element((*[Bytes]byte)(e)) if err != nil { return err } *z = v return nil } // SetBigInt sets z to v and returns z func (z *Element) SetBigInt(v *big.Int) *Element { z.SetZero() var zero big.Int // fast path c := v.Cmp(&_modulus) if c == 0 { // v == 0 return z } else if c != 1 && v.Cmp(&zero) != -1 { // 0 <= v < q return z.setBigInt(v) } // get temporary big int from the pool vv := pool.BigInt.Get() // copy input + modular reduction vv.Mod(v, &_modulus) // set big int byte value z.setBigInt(vv) // release object into pool pool.BigInt.Put(vv) return z } // setBigInt assumes 0 ⩽ v < q func (z *Element) setBigInt(v *big.Int) *Element { vBits := v.Bits() if bits.UintSize == 64 { for i := 0; i < len(vBits); i++ { z[i] = uint64(vBits[i]) } } else { for i := 0; i < len(vBits); i++ { if i%2 == 0 { z[i/2] = uint64(vBits[i]) } else { z[i/2] |= uint64(vBits[i]) << 32 } } } return z.toMont() } // SetString creates a big.Int with number and calls SetBigInt on z // // The number prefix determines the actual base: A prefix of // ”0b” or ”0B” selects base 2, ”0”, ”0o” or ”0O” selects base 8, // and ”0x” or ”0X” selects base 16. Otherwise, the selected base is 10 // and no prefix is accepted. // // For base 16, lower and upper case letters are considered the same: // The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15. // // An underscore character ”_” may appear between a base // prefix and an adjacent digit, and between successive digits; such // underscores do not change the value of the number. // Incorrect placement of underscores is reported as a panic if there // are no other errors. // // If the number is invalid this method leaves z unchanged and returns nil, error. func (z *Element) SetString(number string) (*Element, error) { // get temporary big int from the pool vv := pool.BigInt.Get() if _, ok := vv.SetString(number, 0); !ok { return nil, errors.New("Element.SetString failed -> can't parse number into a big.Int " + number) } z.SetBigInt(vv) // release object into pool pool.BigInt.Put(vv) return z, nil } // MarshalJSON returns json encoding of z (z.Text(10)) // If z == nil, returns null func (z *Element) MarshalJSON() ([]byte, error) { if z == nil { return []byte("null"), nil } const maxSafeBound = 15 // we encode it as number if it's small s := z.Text(10) if len(s) <= maxSafeBound { return []byte(s), nil } var sbb strings.Builder sbb.WriteByte('"') sbb.WriteString(s) sbb.WriteByte('"') return []byte(sbb.String()), nil } // UnmarshalJSON accepts numbers and strings as input // See Element.SetString for valid prefixes (0x, 0b, ...) func (z *Element) UnmarshalJSON(data []byte) error { s := string(data) if len(s) > Bits*3 { return errors.New("value too large (max = Element.Bits * 3)") } // we accept numbers and strings, remove leading and trailing quotes if any if len(s) > 0 && s[0] == '"' { s = s[1:] } if len(s) > 0 && s[len(s)-1] == '"' { s = s[:len(s)-1] } // get temporary big int from the pool vv := pool.BigInt.Get() if _, ok := vv.SetString(s, 0); !ok { return errors.New("can't parse into a big.Int: " + s) } z.SetBigInt(vv) // release object into pool pool.BigInt.Put(vv) return nil } // A ByteOrder specifies how to convert byte slices into a Element type ByteOrder interface { Element(*[Bytes]byte) (Element, error) PutElement(*[Bytes]byte, Element) String() string } var errInvalidEncoding = errors.New("invalid fp.Element encoding") // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian type bigEndian struct{} // Element interpret b is a big-endian 48-byte slice. // If b encodes a value higher than q, Element returns error. func (bigEndian) Element(b *[Bytes]byte) (Element, error) { var z Element z[0] = binary.BigEndian.Uint64((*b)[40:48]) z[1] = binary.BigEndian.Uint64((*b)[32:40]) z[2] = binary.BigEndian.Uint64((*b)[24:32]) z[3] = binary.BigEndian.Uint64((*b)[16:24]) z[4] = binary.BigEndian.Uint64((*b)[8:16]) z[5] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { return Element{}, errInvalidEncoding } z.toMont() return z, nil } func (bigEndian) PutElement(b *[Bytes]byte, e Element) { e.fromMont() binary.BigEndian.PutUint64((*b)[40:48], e[0]) binary.BigEndian.PutUint64((*b)[32:40], e[1]) binary.BigEndian.PutUint64((*b)[24:32], e[2]) binary.BigEndian.PutUint64((*b)[16:24], e[3]) binary.BigEndian.PutUint64((*b)[8:16], e[4]) binary.BigEndian.PutUint64((*b)[0:8], e[5]) } func (bigEndian) String() string { return "BigEndian" } // LittleEndian is the little-endian implementation of ByteOrder and AppendByteOrder. var LittleEndian littleEndian type littleEndian struct{} func (littleEndian) Element(b *[Bytes]byte) (Element, error) { var z Element z[0] = binary.LittleEndian.Uint64((*b)[0:8]) z[1] = binary.LittleEndian.Uint64((*b)[8:16]) z[2] = binary.LittleEndian.Uint64((*b)[16:24]) z[3] = binary.LittleEndian.Uint64((*b)[24:32]) z[4] = binary.LittleEndian.Uint64((*b)[32:40]) z[5] = binary.LittleEndian.Uint64((*b)[40:48]) if !z.smallerThanModulus() { return Element{}, errInvalidEncoding } z.toMont() return z, nil } func (littleEndian) PutElement(b *[Bytes]byte, e Element) { e.fromMont() binary.LittleEndian.PutUint64((*b)[0:8], e[0]) binary.LittleEndian.PutUint64((*b)[8:16], e[1]) binary.LittleEndian.PutUint64((*b)[16:24], e[2]) binary.LittleEndian.PutUint64((*b)[24:32], e[3]) binary.LittleEndian.PutUint64((*b)[32:40], e[4]) binary.LittleEndian.PutUint64((*b)[40:48], e[5]) } func (littleEndian) String() string { return "LittleEndian" } // Legendre returns the Legendre symbol of z (either +1, -1, or 0.) func (z *Element) Legendre() int { // Adapts "Optimized Binary GCD for Modular Inversion" // https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf // For a faithful implementation of Pornin20 see [Inverse]. // We don't need to account for z being in Montgomery form. // (xR|q) = (x|q)(R|q). R is a square (an even power of 2), so (R|q) = 1. a := *z b := Element{ q0, q1, q2, q3, q4, q5, } // b := q // Update factors: we get [a; b] ← [f₀ g₀; f₁ g₁] [a; b] // cᵢ = fᵢ + 2³¹ - 1 + 2³² * (gᵢ + 2³¹ - 1) var c0, c1 int64 var s Element l := 1 // loop invariant: (x|q) = (a|b) . l // This means that every time a and b are updated into a' and b', // l is updated into l' = (x|q)(a'|b')=(x|q)(a|b)(a|b)(a'|b') = l (a|b)(a'|b') // During the algorithm's run, there is no guarantee that b remains prime, or even positive. // Therefore, we use the properties of the Kronecker symbol, a generalization of the Legendre symbol to all integers. for !a.IsZero() { n := max(a.BitLen(), b.BitLen()) aApprox, bApprox := approximateForLegendre(&a, n), approximateForLegendre(&b, n) // f₀, g₀, f₁, g₁ = 1, 0, 0, 1 c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1 const nbIterations = k - 2 // running fewer iterations because we need access to 3 low bits from b, rather than 1 in the inversion algorithm for range nbIterations { if aApprox&1 == 0 { aApprox /= 2 // update the Kronecker symbol // // (a/2 | b) (2|b) = (a|b) // // b is either odd or zero, the latter case implying a non-trivial GCD and an ultimate result of 0, // regardless of what value l holds. // So in updating l, we may assume that b is odd. // Since a is even, we only need to correctly compute l if b is odd. // if b is also even, the non-trivial GCD will result in the function returning 0 anyway. // so we may here assume b is odd. // (2|b) = 1 if b ≡ 1 or 7 (mod 8), and -1 if b ≡ 3 or 5 (mod 8) if bMod8 := bApprox & 7; bMod8 == 3 || bMod8 == 5 { l = -l } } else { s, borrow := bits.Sub64(aApprox, bApprox, 0) if borrow == 1 { // Compute (b-a|a) // (x-y|z) = (x|z) unless z < 0 and sign(x-y) ≠ sign(x) // Pornin20 asserts that at least one of a and b is non-negative. // If a is non-negative, we immediately get (b-a|a) = (b|a) // If a is negative, b-a > b. But b is already non-negative, so the b-a and b have the same sign. // Thus in that case also (b-a|a) = (b|a) // Since not both a and b are negative, we get a quadratic reciprocity law // like that of the Legendre symbol: (b|a) = (a|b), unless a, b ≡ 3 (mod 4), in which case (b|a) = -(a|b) if bApprox&3 == 3 && aApprox&3 == 3 { l = -l } s = bApprox - aApprox bApprox = aApprox c0, c1 = c1, c0 } aApprox = s / 2 c0 = c0 - c1 // update l to reflect halving a, just like in the case where a is even if bMod8 := bApprox & 7; bMod8 == 3 || bMod8 == 5 { l = -l } } c1 *= 2 } s = a var g0 int64 // from this point on c0 aliases for f0 c0, g0 = updateFactorsDecompose(c0) aHi := a.linearCombNonModular(&s, c0, &b, g0) if aHi&signBitSelector != 0 { // if aHi < 0 aHi = negL(&a, aHi) // Since a is negative, b is not and hence b ≠ -1 // So we get (-a|b)=(-1|b)(a|b) // b is odd so we get (-1|b) = 1 if b ≡ 1 (mod 4) and -1 otherwise. if bApprox&3 == 3 { // we still have two valid lower bits for b l = -l } } // right-shift a by k-2 bits a[0] = (a[0] >> nbIterations) | ((a[1]) << (2*k - nbIterations)) a[1] = (a[1] >> nbIterations) | ((a[2]) << (2*k - nbIterations)) a[2] = (a[2] >> nbIterations) | ((a[3]) << (2*k - nbIterations)) a[3] = (a[3] >> nbIterations) | ((a[4]) << (2*k - nbIterations)) a[4] = (a[4] >> nbIterations) | ((a[5]) << (2*k - nbIterations)) a[5] = (a[5] >> nbIterations) | (aHi << (2*k - nbIterations)) var f1 int64 // from this point on c1 aliases for g0 f1, c1 = updateFactorsDecompose(c1) bHi := b.linearCombNonModular(&s, f1, &b, c1) if bHi&signBitSelector != 0 { // if bHi < 0 bHi = negL(&b, bHi) // no need to update l, since we know a ≥ 0 // (a|-1) = 1 if a ≥ 0 } // right-shift b by k-2 bits b[0] = (b[0] >> nbIterations) | ((b[1]) << (2*k - nbIterations)) b[1] = (b[1] >> nbIterations) | ((b[2]) << (2*k - nbIterations)) b[2] = (b[2] >> nbIterations) | ((b[3]) << (2*k - nbIterations)) b[3] = (b[3] >> nbIterations) | ((b[4]) << (2*k - nbIterations)) b[4] = (b[4] >> nbIterations) | ((b[5]) << (2*k - nbIterations)) b[5] = (b[5] >> nbIterations) | (bHi << (2*k - nbIterations)) } if b[0] == 1 && (b[1]|b[2]|b[3]|b[4]|b[5]) == 0 { return l // (0|1) = 1 } else { return 0 // if b ≠ 1, then (z,q) ≠ 0 ⇒ (z|q) = 0 } } // approximate a big number x into a single 64 bit word using its uppermost and lowermost bits. // If x fits in a word as is, no approximation necessary. // This differs from the standard approximate function in that in the Legendre symbol computation // we need to access the 3 low bits of b, rather than just one. So lo ≥ n+2 where n is the number of inner iterations. // The requirement on the high bits is unchanged, hi ≥ n+1. // Thus we hit a maximum of hi = lo = k and n = k-2 as opposed to n = lo = k-1 and hi = k+1 in the standard approximate function. // Since we are doing fewer iterations than in the inversion algorithm, all the arguments on bounds for update factors remain valid. func approximateForLegendre(x *Element, nBits int) uint64 { if nBits <= 64 { return x[0] } const mask = (uint64(1) << k) - 1 // k ones lo := mask & x[0] hiWordIndex := (nBits - 1) / 64 hiWordBitsAvailable := nBits - hiWordIndex*64 hiWordBitsUsed := min(hiWordBitsAvailable, k) mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1)) hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable) mask_ = ^(1<<(k+hiWordBitsUsed) - 1) mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed return lo | mid | hi } // Sqrt z = √x (mod q) // if the square root doesn't exist (x is not a square mod q) // Sqrt leaves z unchanged and returns nil func (z *Element) Sqrt(x *Element) *Element { // q ≡ 3 (mod 4) // using z ≡ ± x^((p+1)/4) (mod q) var y, square Element y.expBySqrtExp(*x) // as we didn't compute the legendre symbol, ensure we found y such that y * y = x square.Square(&y) if square.Equal(x) { return z.Set(&y) } return nil } const ( k = 32 // word size / 2 signBitSelector = uint64(1) << 63 approxLowBitsN = k - 1 approxHighBitsN = k + 1 ) const ( inversionCorrectionFactorWord0 = 8737414717120368535 inversionCorrectionFactorWord1 = 10094300570241649429 inversionCorrectionFactorWord2 = 6339946188669102923 inversionCorrectionFactorWord3 = 10492640117780001228 inversionCorrectionFactorWord4 = 12201317704601795701 inversionCorrectionFactorWord5 = 1158882751927031822 invIterationsN = 26 ) // Inverse z = x⁻¹ (mod q) // // if x == 0, sets and returns z = x func (z *Element) Inverse(x *Element) *Element { // Implements "Optimized Binary GCD for Modular Inversion" // https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf a := *x b := Element{ q0, q1, q2, q3, q4, q5, } // b := q u := Element{1} // Update factors: we get [u; v] ← [f₀ g₀; f₁ g₁] [u; v] // cᵢ = fᵢ + 2³¹ - 1 + 2³² * (gᵢ + 2³¹ - 1) var c0, c1 int64 // Saved update factors to reduce the number of field multiplications var pf0, pf1, pg0, pg1 int64 var i uint var v, s Element // Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations // This also lets us get away with half as many updates to u,v // To make this constant-time-ish, replace the condition with i < invIterationsN for i = 0; i&1 == 1 || !a.IsZero(); i++ { n := max(a.BitLen(), b.BitLen()) aApprox, bApprox := approximate(&a, n), approximate(&b, n) // f₀, g₀, f₁, g₁ = 1, 0, 0, 1 c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1 for j := 0; j < approxLowBitsN; j++ { // -2ʲ < f₀, f₁ ≤ 2ʲ // |f₀| + |f₁| < 2ʲ⁺¹ if aApprox&1 == 0 { aApprox /= 2 } else { s, borrow := bits.Sub64(aApprox, bApprox, 0) if borrow == 1 { s = bApprox - aApprox bApprox = aApprox c0, c1 = c1, c0 // invariants unchanged } aApprox = s / 2 c0 = c0 - c1 // Now |f₀| < 2ʲ⁺¹ ≤ 2ʲ⁺¹ (only the weaker inequality is needed, strictly speaking) // Started with f₀ > -2ʲ and f₁ ≤ 2ʲ, so f₀ - f₁ > -2ʲ⁺¹ // Invariants unchanged for f₁ } c1 *= 2 // -2ʲ⁺¹ < f₁ ≤ 2ʲ⁺¹ // So now |f₀| + |f₁| < 2ʲ⁺² } s = a var g0 int64 // from this point on c0 aliases for f0 c0, g0 = updateFactorsDecompose(c0) aHi := a.linearCombNonModular(&s, c0, &b, g0) if aHi&signBitSelector != 0 { // if aHi < 0 c0, g0 = -c0, -g0 aHi = negL(&a, aHi) } // right-shift a by k-1 bits a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN) a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN) a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN) a[3] = (a[3] >> approxLowBitsN) | ((a[4]) << approxHighBitsN) a[4] = (a[4] >> approxLowBitsN) | ((a[5]) << approxHighBitsN) a[5] = (a[5] >> approxLowBitsN) | (aHi << approxHighBitsN) var f1 int64 // from this point on c1 aliases for g0 f1, c1 = updateFactorsDecompose(c1) bHi := b.linearCombNonModular(&s, f1, &b, c1) if bHi&signBitSelector != 0 { // if bHi < 0 f1, c1 = -f1, -c1 bHi = negL(&b, bHi) } // right-shift b by k-1 bits b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN) b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN) b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN) b[3] = (b[3] >> approxLowBitsN) | ((b[4]) << approxHighBitsN) b[4] = (b[4] >> approxLowBitsN) | ((b[5]) << approxHighBitsN) b[5] = (b[5] >> approxLowBitsN) | (bHi << approxHighBitsN) if i&1 == 1 { // Combine current update factors with previously stored ones // [F₀, G₀; F₁, G₁] ← [f₀, g₀; f₁, g₁] [pf₀, pg₀; pf₁, pg₁], with capital letters denoting new combined values // We get |F₀| = | f₀pf₀ + g₀pf₁ | ≤ |f₀pf₀| + |g₀pf₁| = |f₀| |pf₀| + |g₀| |pf₁| ≤ 2ᵏ⁻¹|pf₀| + 2ᵏ⁻¹|pf₁| // = 2ᵏ⁻¹ (|pf₀| + |pf₁|) < 2ᵏ⁻¹ 2ᵏ = 2²ᵏ⁻¹ // So |F₀| < 2²ᵏ⁻¹ meaning it fits in a 2k-bit signed register // c₀ aliases f₀, c₁ aliases g₁ c0, g0, f1, c1 = c0*pf0+g0*pf1, c0*pg0+g0*pg1, f1*pf0+c1*pf1, f1*pg0+c1*pg1 s = u // 0 ≤ u, v < 2²⁵⁵ // |F₀|, |G₀| < 2⁶³ u.linearComb(&u, c0, &v, g0) // |F₁|, |G₁| < 2⁶³ v.linearComb(&s, f1, &v, c1) } else { // Save update factors pf0, pg0, pf1, pg1 = c0, g0, f1, c1 } } // For every iteration that we miss, v is not being multiplied by 2ᵏ⁻² const pSq uint64 = 1 << (2 * (k - 1)) a = Element{pSq} // If the function is constant-time ish, this loop will not run (no need to take it out explicitly) for ; i < invIterationsN; i += 2 { // could optimize further with mul by word routine or by pre-computing a table since with k=26, // we would multiply by pSq up to 13times; // on x86, the assembly routine outperforms generic code for mul by word // on arm64, we may loose up to ~5% for 6 limbs v.Mul(&v, &a) } u.Set(x) // for correctness check z.Mul(&v, &Element{ inversionCorrectionFactorWord0, inversionCorrectionFactorWord1, inversionCorrectionFactorWord2, inversionCorrectionFactorWord3, inversionCorrectionFactorWord4, inversionCorrectionFactorWord5, }) // correctness check v.Mul(&u, z) if !v.IsOne() && !u.IsZero() { return z.inverseExp(u) } return z } // inverseExp computes z = x⁻¹ (mod q) = x**(q-2) (mod q) func (z *Element) inverseExp(x Element) *Element { // e == q-2 e := Modulus() e.Sub(e, big.NewInt(2)) z.Set(&x) for i := e.BitLen() - 2; i >= 0; i-- { z.Square(z) if e.Bit(i) == 1 { z.Mul(z, &x) } } return z } // approximate a big number x into a single 64 bit word using its uppermost and lowermost bits // if x fits in a word as is, no approximation necessary func approximate(x *Element, nBits int) uint64 { if nBits <= 64 { return x[0] } const mask = (uint64(1) << approxLowBitsN) - 1 // k-1 ones lo := mask & x[0] hiWordIndex := (nBits - 1) / 64 hiWordBitsAvailable := nBits - hiWordIndex*64 hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN) mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1)) hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable) mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1) mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed return lo | mid | hi } // linearComb z = xC * x + yC * y; // 0 ≤ x, y < 2³⁸¹ // |xC|, |yC| < 2⁶³ func (z *Element) linearComb(x *Element, xC int64, y *Element, yC int64) { // | (hi, z) | < 2 * 2⁶³ * 2³⁸¹ = 2⁴⁴⁵ // therefore | hi | < 2⁶¹ ≤ 2⁶³ hi := z.linearCombNonModular(x, xC, y, yC) z.montReduceSigned(z, hi) } // montReduceSigned z = (xHi * r + x) * r⁻¹ using the SOS algorithm // Requires |xHi| < 2⁶³. Most significant bit of xHi is the sign bit. func (z *Element) montReduceSigned(x *Element, xHi uint64) { const signBitRemover = ^signBitSelector mustNeg := xHi&signBitSelector != 0 // the SOS implementation requires that most significant bit is 0 // Let X be xHi*r + x // If X is negative we would have initially stored it as 2⁶⁴ r + X (à la 2's complement) xHi &= signBitRemover // with this a negative X is now represented as 2⁶³ r + X var t [2*Limbs - 1]uint64 var C uint64 m := x[0] * qInvNeg C = madd0(m, q0, x[0]) C, t[1] = madd2(m, q1, x[1], C) C, t[2] = madd2(m, q2, x[2], C) C, t[3] = madd2(m, q3, x[3], C) C, t[4] = madd2(m, q4, x[4], C) C, t[5] = madd2(m, q5, x[5], C) // m * qElement[5] ≤ (2⁶⁴ - 1) * (2⁶³ - 1) = 2¹²⁷ - 2⁶⁴ - 2⁶³ + 1 // x[5] + C ≤ 2*(2⁶⁴ - 1) = 2⁶⁵ - 2 // On LHS, (C, t[5]) ≤ 2¹²⁷ - 2⁶⁴ - 2⁶³ + 1 + 2⁶⁵ - 2 = 2¹²⁷ + 2⁶³ - 1 // So on LHS, C ≤ 2⁶³ t[6] = xHi + C // xHi + C < 2⁶³ + 2⁶³ = 2⁶⁴ // { const i = 1 m = t[i] * qInvNeg C = madd0(m, q0, t[i+0]) C, t[i+1] = madd2(m, q1, t[i+1], C) C, t[i+2] = madd2(m, q2, t[i+2], C) C, t[i+3] = madd2(m, q3, t[i+3], C) C, t[i+4] = madd2(m, q4, t[i+4], C) C, t[i+5] = madd2(m, q5, t[i+5], C) t[i+Limbs] += C } { const i = 2 m = t[i] * qInvNeg C = madd0(m, q0, t[i+0]) C, t[i+1] = madd2(m, q1, t[i+1], C) C, t[i+2] = madd2(m, q2, t[i+2], C) C, t[i+3] = madd2(m, q3, t[i+3], C) C, t[i+4] = madd2(m, q4, t[i+4], C) C, t[i+5] = madd2(m, q5, t[i+5], C) t[i+Limbs] += C } { const i = 3 m = t[i] * qInvNeg C = madd0(m, q0, t[i+0]) C, t[i+1] = madd2(m, q1, t[i+1], C) C, t[i+2] = madd2(m, q2, t[i+2], C) C, t[i+3] = madd2(m, q3, t[i+3], C) C, t[i+4] = madd2(m, q4, t[i+4], C) C, t[i+5] = madd2(m, q5, t[i+5], C) t[i+Limbs] += C } { const i = 4 m = t[i] * qInvNeg C = madd0(m, q0, t[i+0]) C, t[i+1] = madd2(m, q1, t[i+1], C) C, t[i+2] = madd2(m, q2, t[i+2], C) C, t[i+3] = madd2(m, q3, t[i+3], C) C, t[i+4] = madd2(m, q4, t[i+4], C) C, t[i+5] = madd2(m, q5, t[i+5], C) t[i+Limbs] += C } { const i = 5 m := t[i] * qInvNeg C = madd0(m, q0, t[i+0]) C, z[0] = madd2(m, q1, t[i+1], C) C, z[1] = madd2(m, q2, t[i+2], C) C, z[2] = madd2(m, q3, t[i+3], C) C, z[3] = madd2(m, q4, t[i+4], C) z[5], z[4] = madd2(m, q5, t[i+5], C) } // if z ⩾ q → z -= q if !z.smallerThanModulus() { var b uint64 z[0], b = bits.Sub64(z[0], q0, 0) z[1], b = bits.Sub64(z[1], q1, b) z[2], b = bits.Sub64(z[2], q2, b) z[3], b = bits.Sub64(z[3], q3, b) z[4], b = bits.Sub64(z[4], q4, b) z[5], _ = bits.Sub64(z[5], q5, b) } // if mustNeg { // We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead var b uint64 z[0], b = bits.Sub64(z[0], signBitSelector, 0) z[1], b = bits.Sub64(z[1], 0, b) z[2], b = bits.Sub64(z[2], 0, b) z[3], b = bits.Sub64(z[3], 0, b) z[4], b = bits.Sub64(z[4], 0, b) z[5], b = bits.Sub64(z[5], 0, b) // Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0 if b != 0 { // z[5] = -1 // negative: add q const neg1 = 0xFFFFFFFFFFFFFFFF var carry uint64 z[0], carry = bits.Add64(z[0], q0, 0) z[1], carry = bits.Add64(z[1], q1, carry) z[2], carry = bits.Add64(z[2], q2, carry) z[3], carry = bits.Add64(z[3], q3, carry) z[4], carry = bits.Add64(z[4], q4, carry) z[5], _ = bits.Add64(neg1, q5, carry) } } } const ( updateFactorsConversionBias int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1) updateFactorIdentityMatrixRow0 = 1 updateFactorIdentityMatrixRow1 = 1 << 32 ) func updateFactorsDecompose(c int64) (int64, int64) { c += updateFactorsConversionBias const low32BitsFilter int64 = 0xFFFFFFFF f := c&low32BitsFilter - 0x7FFFFFFF g := c>>32&low32BitsFilter - 0x7FFFFFFF return f, g } // negL negates in place [x | xHi] and return the new most significant word xHi func negL(x *Element, xHi uint64) uint64 { var b uint64 x[0], b = bits.Sub64(0, x[0], 0) x[1], b = bits.Sub64(0, x[1], b) x[2], b = bits.Sub64(0, x[2], b) x[3], b = bits.Sub64(0, x[3], b) x[4], b = bits.Sub64(0, x[4], b) x[5], b = bits.Sub64(0, x[5], b) xHi, _ = bits.Sub64(0, xHi, b) return xHi } // mulWNonModular multiplies by one word in non-montgomery, without reducing func (z *Element) mulWNonModular(x *Element, y int64) uint64 { // w := abs(y) m := y >> 63 w := uint64((y ^ m) - m) var c uint64 c, z[0] = bits.Mul64(x[0], w) c, z[1] = madd1(x[1], w, c) c, z[2] = madd1(x[2], w, c) c, z[3] = madd1(x[3], w, c) c, z[4] = madd1(x[4], w, c) c, z[5] = madd1(x[5], w, c) if y < 0 { c = negL(z, c) } return c } // linearCombNonModular computes a linear combination without modular reduction func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 { var yTimes Element yHi := yTimes.mulWNonModular(y, yC) xHi := z.mulWNonModular(x, xC) var carry uint64 z[0], carry = bits.Add64(z[0], yTimes[0], 0) z[1], carry = bits.Add64(z[1], yTimes[1], carry) z[2], carry = bits.Add64(z[2], yTimes[2], carry) z[3], carry = bits.Add64(z[3], yTimes[3], carry) z[4], carry = bits.Add64(z[4], yTimes[4], carry) z[5], carry = bits.Add64(z[5], yTimes[5], carry) yHi, _ = bits.Add64(xHi, yHi, carry) return yHi }