package fp import "math/big" // The following code is _almost_ the original code from: // https://github.com/GottfriedHerold/Bandersnatch/blob/f665f90b64892b9c4c89cff3219e70456bb431e5/bandersnatch/fieldElements/field_element_square_root.go // // We had to do some changes to make it work with gnark: // - The type `feType_SquareRoot` was aliased to `Element` so everything looks the same. These types didn't have the exact // same underlying representation, so it leaded to some minor adjustements. (e.g: accessing the limbs) // - Original APIs regarding finite-field multiplications (e.g: MulEq) were adjusted to use gnark Mul APIs. // - The original code had to explicitly do `Normalize()` after field element operations, but this isn't needed in gnark. // - The primitive 2^32-root-of unity value (see init()) was pulled from gnark FFT domain code. // - The original code used anonymous functions to define global vars, but we changed to use a init() function. // This was required since we have other init() in the package that configure other globals (e.g: _modulus). // By the way init() functions execution order works, we'll have these configured before the sqrt init() is called, // compared with the original anonymous function global calls. type feType_SquareRoot = Element const ( BaseField2Adicity = 32 sqrtParam_TotalBits = BaseField2Adicity // (p-1) = n^Q. 2^S with Q odd, leads to S = 32. sqrtParam_BlockSize = 8 // 8 bit window per chunk sqrtParam_Blocks = sqrtParam_TotalBits / sqrtParam_BlockSize sqrtParam_FirstBlockUnusedBits = sqrtParam_Blocks*sqrtParam_BlockSize - sqrtParam_TotalBits // number of unused bits in the first reconstructed block. sqrtParam_BitMask = (1 << sqrtParam_BlockSize) - 1 // bitmask to pick up the last sqrtParam_BlockSize bits. ) // NOTE: These "variables" are actually pre-computed constants that must not change. var ( // sqrtPrecomp_PrimitiveDyadicRoots[i] equals DyadicRootOfUnity^(2^i) for 0 <= i <= 32 // // This means that it is a 32-i'th primitive root of unitity, obtained by repeatedly squaring a 2^32th primitive root of unity [DyadicRootOfUnity_fe]. sqrtPrecomp_PrimitiveDyadicRoots [BaseField2Adicity + 1]feType_SquareRoot // primitive root of unity of order 2^sqrtParam_BlockSize sqrtPrecomp_ReconstructionDyadicRoot feType_SquareRoot // sqrtPrecomp_dlogLUT is a lookup table used to implement the map sqrtPrecompt_reconstructionDyadicRoot^a -> -a sqrtPrecomp_dlogLUT map[uint16]uint ) func init() { sqrtPrecomp_PrimitiveDyadicRoots = func() (ret [BaseField2Adicity + 1]feType_SquareRoot) { if _, err := ret[0].SetString("10238227357739495823651030575849232062558860180284477541189508159991286009131"); err != nil { panic(err) } for i := 1; i <= BaseField2Adicity; i++ { // Note <= here ret[i].Square(&ret[i-1]) } // 31th one must be -1. We check that here. x := big.NewInt(0) ret[BaseField2Adicity-1].BigInt(x) if ret[BaseField2Adicity-1].String() != "-1" { panic("something is wrong with the dyadic roots of unity") } return }() // immediately invoked lambda sqrtPrecomp_ReconstructionDyadicRoot = sqrtPrecomp_PrimitiveDyadicRoots[BaseField2Adicity-sqrtParam_BlockSize] sqrtPrecomp_PrecomputedBlocks = func() (blocks [sqrtParam_Blocks][1 << sqrtParam_BlockSize]feType_SquareRoot) { for i := 0; i < sqrtParam_Blocks; i++ { blocks[i][0].SetOne() for j := 1; j < (1 << sqrtParam_BlockSize); j++ { blocks[i][j].Mul(&blocks[i][j-1], &sqrtPrecomp_PrimitiveDyadicRoots[i*sqrtParam_BlockSize]) } } return }() // immediately invoked lambda sqrtPrecomp_dlogLUT = func() (ret map[uint16]uint) { const LUTSize = 1 << sqrtParam_BlockSize // 256 ret = make(map[uint16]uint, LUTSize) var rootOfUnity feType_SquareRoot rootOfUnity.SetOne() for i := 0; i < LUTSize; i++ { const mask = LUTSize - 1 // the LUTSize many roots of unity all (by chance) have distinct values for .words[0]&0xFFFF. Note that this uses the Montgomery representation. ret[uint16(rootOfUnity[0]&0xFFFF)] = uint((-i) & mask) rootOfUnity.Mul(&rootOfUnity, &sqrtPrecomp_ReconstructionDyadicRoot) } // This effectively checks the above claim (that .words[0]&0xFFFF is distinct). // Note that this might fail if we adjust the sqrtParam_BlockSize parameter and this check will alert us. if len(ret) != LUTSize { panic("failed to store all appropriate roots of unity in a map") } return }() // immediately invoked lambda } // sqrtAlg_NegDlogInSmallDyadicSubgroup takes a (not necessarily primitive) root of unity x of order 2^sqrtParam_BlockSize. // x has the form sqrtPrecomp_ReconstructionDyadicRoot^a and returns its negative dlog -a. // // The returned value is only meaningful modulo 1<>= sqrtParam_FirstBlockUnusedBits // if the exponent we just got is odd, there is no square root, no point in determining the other bits. if negExponent&1 == 1 { return false } // Get remaining bits for i := 1; i < sqrtParam_Blocks; i++ { temp2 = powers[sqrtParam_Blocks-1-i] // We essentially un-set the bits we already know from powers[_sqrtNumBlocks-1-i] for j := 0; j < i; j++ { sqrtAlg_GetPrecomputedRootOfUnity(&temp, int((negExponent>>(j*sqrtParam_BlockSize))&sqrtParam_BitMask), uint(j+sqrtParam_Blocks-1-i)) temp2.Mul(&temp2, &temp) } newBits := sqrtAlg_NegDlogInSmallDyadicSubgroup(&temp2) negExponent |= newBits << (sqrtParam_BlockSize*i - sqrtParam_FirstBlockUnusedBits) } // var tmp _FESquareRoot // negExponent is now the negative dlog of z. // Take the square root negExponent >>= 1 // Write to z: z.SetOne() for i := 0; i < sqrtParam_Blocks; i++ { sqrtAlg_GetPrecomputedRootOfUnity(&temp, int((negExponent>>(i*sqrtParam_BlockSize))&sqrtParam_BitMask), uint(i)) z.Mul(z, &temp) } return true } func sqrtAlg_ComputeRelevantPowers(z *Element, squareRootCandidate *feType_SquareRoot, rootOfUnity *feType_SquareRoot) { SquareEqNTimes := func(z *feType_SquareRoot, n int) { for i := 0; i < n; i++ { z.Square(z) } } // hand-crafted sliding window-type algorithm with window-size 5 // Note that we precompute and use z^255 multiple times (even though it's not size 5) // and some windows actually overlap(!) var z2, z3, z7, z6, z9, z11, z13, z19, z21, z25, z27, z29, z31, z255 feType_SquareRoot var acc feType_SquareRoot z2.Square(z) // 0b10 z3.Mul(z, &z2) // 0b11 z6.Square(&z3) // 0b110 z7.Mul(z, &z6) // 0b111 z9.Mul(&z7, &z2) // 0b1001 z11.Mul(&z9, &z2) // 0b1011 z13.Mul(&z11, &z2) // 0b1101 z19.Mul(&z13, &z6) // 0b10011 z21.Mul(&z2, &z19) // 0b10101 z25.Mul(&z19, &z6) // 0b11001 z27.Mul(&z25, &z2) // 0b11011 z29.Mul(&z27, &z2) // 0b11101 z31.Mul(&z29, &z2) // 0b11111 acc.Mul(&z27, &z29) // 56 acc.Square(&acc) // 112 acc.Square(&acc) // 224 z255.Mul(&acc, &z31) // 0b11111111 = 255 acc.Square(&acc) // 448 acc.Square(&acc) // 896 acc.Mul(&acc, &z31) // 0b1110011111 = 927 SquareEqNTimes(&acc, 6) // 0b1110011111000000 acc.Mul(&acc, &z27) // 0b1110011111011011 SquareEqNTimes(&acc, 6) // 0b1110011111011011000000 acc.Mul(&acc, &z19) // 0b1110011111011011010011 SquareEqNTimes(&acc, 5) // 0b111001111101101101001100000 acc.Mul(&acc, &z21) // 0b111001111101101101001110101 SquareEqNTimes(&acc, 7) // 0b1110011111011011010011101010000000 acc.Mul(&acc, &z25) // 0b1110011111011011010011101010011001 SquareEqNTimes(&acc, 6) // 0b1110011111011011010011101010011001000000 acc.Mul(&acc, &z19) // 0b1110011111011011010011101010011001010011 SquareEqNTimes(&acc, 5) // 0b111001111101101101001110101001100101001100000 acc.Mul(&acc, &z7) // 0b111001111101101101001110101001100101001100111 SquareEqNTimes(&acc, 5) // 0b11100111110110110100111010100110010100110011100000 acc.Mul(&acc, &z11) // 0b11100111110110110100111010100110010100110011101011 SquareEqNTimes(&acc, 5) // 0b1110011111011011010011101010011001010011001110101100000 acc.Mul(&acc, &z29) // 0b1110011111011011010011101010011001010011001110101111101 SquareEqNTimes(&acc, 5) // 0b111001111101101101001110101001100101001100111010111110100000 acc.Mul(&acc, &z9) // 0b111001111101101101001110101001100101001100111010111110101001 SquareEqNTimes(&acc, 7) // 0b1110011111011011010011101010011001010011001110101111101010010000000 acc.Mul(&acc, &z3) // 0b1110011111011011010011101010011001010011001110101111101010010000011 SquareEqNTimes(&acc, 7) // 0b11100111110110110100111010100110010100110011101011111010100100000110000000 acc.Mul(&acc, &z25) // 0b11100111110110110100111010100110010100110011101011111010100100000110011001 SquareEqNTimes(&acc, 5) // 0b1110011111011011010011101010011001010011001110101111101010010000011001100100000 acc.Mul(&acc, &z25) // 0b1110011111011011010011101010011001010011001110101111101010010000011001100111001 SquareEqNTimes(&acc, 5) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100100000 acc.Mul(&acc, &z27) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011 SquareEqNTimes(&acc, 8) // 0b11100111110110110100111010100110010100110011101011111010100100000110011001110011101100000000 acc.Mul(&acc, z) // 0b11100111110110110100111010100110010100110011101011111010100100000110011001110011101100000001 SquareEqNTimes(&acc, 8) // 0b1110011111011011010011101010011001010011001110101111101010010000011001100111001110110000000100000000 acc.Mul(&acc, z) // 0b1110011111011011010011101010011001010011001110101111101010010000011001100111001110110000000100000001 SquareEqNTimes(&acc, 6) // 0b1110011111011011010011101010011001010011001110101111101010010000011001100111001110110000000100000001000000 acc.Mul(&acc, &z13) // 0b1110011111011011010011101010011001010011001110101111101010010000011001100111001110110000000100000001001101 SquareEqNTimes(&acc, 7) // 0b11100111110110110100111010100110010100110011101011111010100100000110011001110011101100000001000000010011010000000 acc.Mul(&acc, &z7) // 0b11100111110110110100111010100110010100110011101011111010100100000110011001110011101100000001000000010011010000111 SquareEqNTimes(&acc, 3) // 0b11100111110110110100111010100110010100110011101011111010100100000110011001110011101100000001000000010011010000111000 acc.Mul(&acc, &z3) // 0b11100111110110110100111010100110010100110011101011111010100100000110011001110011101100000001000000010011010000111011 SquareEqNTimes(&acc, 13) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000000000 acc.Mul(&acc, &z21) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000010101 SquareEqNTimes(&acc, 5) // 0b11100111110110110100111010100110010100110011101011111010100100000110011001110011101100000001000000010011010000111011000000001010100000 acc.Mul(&acc, &z9) // 0b11100111110110110100111010100110010100110011101011111010100100000110011001110011101100000001000000010011010000111011000000001010101001 SquareEqNTimes(&acc, 5) // 0b1110011111011011010011101010011001010011001110101111101010010000011001100111001110110000000100000001001101000011101100000000101010100100000 acc.Mul(&acc, &z27) // 0b1110011111011011010011101010011001010011001110101111101010010000011001100111001110110000000100000001001101000011101100000000101010100111011 SquareEqNTimes(&acc, 5) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000010101010011101100000 acc.Mul(&acc, &z27) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000010101010011101111011 SquareEqNTimes(&acc, 5) // 0b11100111110110110100111010100110010100110011101011111010100100000110011001110011101100000001000000010011010000111011000000001010101001110111101100000 acc.Mul(&acc, &z9) // 0b11100111110110110100111010100110010100110011101011111010100100000110011001110011101100000001000000010011010000111011000000001010101001110111101101001 SquareEqNTimes(&acc, 10) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000010101010011101111011010010000000000 acc.Mul(&acc, z) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000010101010011101111011010010000000001 SquareEqNTimes(&acc, 7) // 0b1110011111011011010011101010011001010011001110101111101010010000011001100111001110110000000100000001001101000011101100000000101010100111011110110100100000000010000000 acc.Mul(&acc, &z255) // 0b1110011111011011010011101010011001010011001110101111101010010000011001100111001110110000000100000001001101000011101100000000101010100111011110110100100000000101111111 SquareEqNTimes(&acc, 8) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000010101010011101111011010010000000010111111100000000 acc.Mul(&acc, &z255) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000010101010011101111011010010000000010111111111111111 SquareEqNTimes(&acc, 6) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000010101010011101111011010010000000010111111111111111000000 acc.Mul(&acc, &z11) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000010101010011101111011010010000000010111111111111111001011 SquareEqNTimes(&acc, 9) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000010101010011101111011010010000000010111111111111111001011000000000 acc.Mul(&acc, &z255) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000010101010011101111011010010000000010111111111111111001011011111111 SquareEqNTimes(&acc, 2) // 0b11100111110110110100111010100110010100110011101011111010100100000110011001110011101100000001000000010011010000111011000000001010101001110111101101001000000001011111111111111100101101111111100 acc.Mul(&acc, z) // 0b11100111110110110100111010100110010100110011101011111010100100000110011001110011101100000001000000010011010000111011000000001010101001110111101101001000000001011111111111111100101101111111101 SquareEqNTimes(&acc, 7) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000010101010011101111011010010000000010111111111111111001011011111111010000000 acc.Mul(&acc, &z255) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000010101010011101111011010010000000010111111111111111001011011111111101111111 SquareEqNTimes(&acc, 8) // 0b11100111110110110100111010100110010100110011101011111010100100000110011001110011101100000001000000010011010000111011000000001010101001110111101101001000000001011111111111111100101101111111110111111100000000 acc.Mul(&acc, &z255) // 0b11100111110110110100111010100110010100110011101011111010100100000110011001110011101100000001000000010011010000111011000000001010101001110111101101001000000001011111111111111100101101111111110111111111111111 SquareEqNTimes(&acc, 8) // 0b1110011111011011010011101010011001010011001110101111101010010000011001100111001110110000000100000001001101000011101100000000101010100111011110110100100000000101111111111111110010110111111111011111111111111100000000 acc.Mul(&acc, &z255) // 0b1110011111011011010011101010011001010011001110101111101010010000011001100111001110110000000100000001001101000011101100000000101010100111011110110100100000000101111111111111110010110111111111011111111111111111111111 SquareEqNTimes(&acc, 8) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000010101010011101111011010010000000010111111111111111001011011111111101111111111111111111111100000000 acc.Mul(&acc, &z255) // 0b111001111101101101001110101001100101001100111010111110101001000001100110011100111011000000010000000100110100001110110000000010101010011101111011010010000000010111111111111111001011011111111101111111111111111111111111111111 // acc is now z^((BaseFieldMultiplicativeOddOrder - 1)/2) rootOfUnity.Square(&acc) // BaseFieldMultiplicativeOddOrder - 1 rootOfUnity.Mul(rootOfUnity, z) // BaseFieldMultiplicativeOddOrder squareRootCandidate.Mul(&acc, z) // (BaseFieldMultiplicativeOddOrder + 1)/2 }