1286 lines
42 KiB
Go
1286 lines
42 KiB
Go
// Copyright 2020 ConsenSys Software Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Code generated by consensys/gnark-crypto DO NOT EDIT
|
|
//
|
|
// This code has been editted to be suitable for inner curves
|
|
|
|
package bandersnatch
|
|
|
|
import (
|
|
"errors"
|
|
"math"
|
|
"runtime"
|
|
"sync"
|
|
|
|
"github.com/crate-crypto/go-ipa/bandersnatch/fr"
|
|
"github.com/crate-crypto/go-ipa/common/parallel"
|
|
)
|
|
|
|
// MultiExpConfig enables to set optional configuration attribute to a call to MultiExp
|
|
type MultiExpConfig struct {
|
|
NbTasks int // go routines to be used in the multiexp. can be larger than num cpus.
|
|
ScalarsMont bool // indicates if the scalars are in montgomery form. Default to false.
|
|
}
|
|
|
|
// selector stores the index, mask and shifts needed to select bits from a scalar
|
|
// it is used during the multiExp algorithm or the batch scalar multiplication
|
|
type selector struct {
|
|
index uint64 // index in the multi-word scalar to select bits from
|
|
mask uint64 // mask (c-bit wide)
|
|
shift uint64 // shift needed to get our bits on low positions
|
|
|
|
multiWordSelect bool // set to true if we need to select bits from 2 words (case where c doesn't divide 64)
|
|
maskHigh uint64 // same than mask, for index+1
|
|
shiftHigh uint64 // same than shift, for index+1
|
|
}
|
|
|
|
// partitionScalars compute, for each scalars over c-bit wide windows, nbChunk digits
|
|
// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
|
|
// 2^{c} to the current digit, making it negative.
|
|
// negative digits can be processed in a later step as adding -G into the bucket instead of G
|
|
// (computing -G is cheap, and this saves us half of the buckets in the MultiExp or BatchScalarMul)
|
|
// scalarsMont indicates wheter the provided scalars are in montgomery form
|
|
// returns smallValues, which represent the number of scalars which meets the following condition
|
|
// 0 < scalar < 2^c (in other words, scalars where only the c-least significant bits are non zero)
|
|
func partitionScalars(scalars []fr.Element, c uint64, scalarsMont bool, nbTasks int) ([]fr.Element, int) {
|
|
toReturn := make([]fr.Element, len(scalars))
|
|
|
|
// number of c-bit radixes in a scalar
|
|
nbChunks := fr.Limbs * 64 / c
|
|
if (fr.Limbs*64)%c != 0 {
|
|
nbChunks++
|
|
}
|
|
|
|
mask := uint64((1 << c) - 1) // low c bits are 1
|
|
msbWindow := uint64(1 << (c - 1)) // msb of the c-bit window
|
|
max := int(1 << (c - 1)) // max value we want for our digits
|
|
cDivides64 := (64 % c) == 0 // if c doesn't divide 64, we may need to select over multiple words
|
|
|
|
// compute offset and word selector / shift to select the right bits of our windows
|
|
selectors := make([]selector, nbChunks)
|
|
for chunk := uint64(0); chunk < nbChunks; chunk++ {
|
|
jc := uint64(chunk * c)
|
|
d := selector{}
|
|
d.index = jc / 64
|
|
d.shift = jc - (d.index * 64)
|
|
d.mask = mask << d.shift
|
|
d.multiWordSelect = !cDivides64 && d.shift > (64-c) && d.index < (fr.Limbs-1)
|
|
if d.multiWordSelect {
|
|
nbBitsHigh := d.shift - uint64(64-c)
|
|
d.maskHigh = (1 << nbBitsHigh) - 1
|
|
d.shiftHigh = (c - nbBitsHigh)
|
|
}
|
|
selectors[chunk] = d
|
|
}
|
|
|
|
// for each chunk, we could track the number of non-zeros points we will need to process
|
|
// this way, if a chunk has more work to do than others, we can spawn off more go routines
|
|
// (at the cost of more buckets allocated)
|
|
// a simplified approach is to track the small values where only the first word is set
|
|
// if this number represent a significant number of points, then we will split first chunk
|
|
// processing in the msm in 2, to ensure all go routines finish at ~same time
|
|
// /!\ nbTasks is enough as parallel.Execute is not going to spawn more than nbTasks go routine
|
|
// if it does, though, this will deadlocK.
|
|
chSmallValues := make(chan int, nbTasks)
|
|
|
|
parallel.Execute(len(scalars), func(start, end int) {
|
|
smallValues := 0
|
|
for i := start; i < end; i++ {
|
|
var carry int
|
|
|
|
scalar := scalars[i]
|
|
if scalarsMont {
|
|
scalar.FromMont()
|
|
}
|
|
if scalar.IsUint64() {
|
|
// everything is 0, no need to process this scalar
|
|
if scalar[0] == 0 {
|
|
continue
|
|
}
|
|
// low c-bits are 1 in mask
|
|
if scalar[0]&mask == scalar[0] {
|
|
smallValues++
|
|
}
|
|
}
|
|
|
|
// for each chunk in the scalar, compute the current digit, and an eventual carry
|
|
for chunk := uint64(0); chunk < nbChunks; chunk++ {
|
|
s := selectors[chunk]
|
|
|
|
// init with carry if any
|
|
digit := carry
|
|
carry = 0
|
|
|
|
// digit = value of the c-bit window
|
|
digit += int((scalar[s.index] & s.mask) >> s.shift)
|
|
|
|
if s.multiWordSelect {
|
|
// we are selecting bits over 2 words
|
|
digit += int(scalar[s.index+1]&s.maskHigh) << s.shiftHigh
|
|
}
|
|
|
|
// if digit is zero, no impact on result
|
|
if digit == 0 {
|
|
continue
|
|
}
|
|
|
|
// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
|
|
// 2^{c} to the current digit, making it negative.
|
|
if digit >= max {
|
|
digit -= (1 << c)
|
|
carry = 1
|
|
}
|
|
|
|
var bits uint64
|
|
if digit >= 0 {
|
|
bits = uint64(digit)
|
|
} else {
|
|
bits = uint64(-digit-1) | msbWindow
|
|
}
|
|
|
|
toReturn[i][s.index] |= (bits << s.shift)
|
|
if s.multiWordSelect {
|
|
toReturn[i][s.index+1] |= (bits >> s.shiftHigh)
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
chSmallValues <- smallValues
|
|
|
|
}, nbTasks)
|
|
|
|
// aggregate small values
|
|
close(chSmallValues)
|
|
smallValues := 0
|
|
for o := range chSmallValues {
|
|
smallValues += o
|
|
}
|
|
return toReturn, smallValues
|
|
}
|
|
|
|
// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
|
|
func MultiExpAffine(points []PointAffine, scalars []fr.Element, config MultiExpConfig) (PointAffine, error) {
|
|
var _p PointProj
|
|
if _, err := MultiExp(&_p, points, scalars, config); err != nil {
|
|
return PointAffine{}, err
|
|
}
|
|
|
|
var p PointAffine
|
|
p.FromProj(&_p)
|
|
return p, nil
|
|
}
|
|
|
|
// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
|
|
// Note: We rely on this algortithm not use Equal functionality, since it is called by a banderwagon element
|
|
func MultiExp(p *PointProj, points []PointAffine, scalars []fr.Element, config MultiExpConfig) (*PointProj, error) {
|
|
// note:
|
|
// each of the msmCX method is the same, except for the c constant it declares
|
|
// duplicating (through template generation) these methods allows to declare the buckets on the stack
|
|
// the choice of c needs to be improved:
|
|
// there is a theoritical value that gives optimal asymptotics
|
|
// but in practice, other factors come into play, including:
|
|
// * if c doesn't divide 64, the word size, then we're bound to select bits over 2 words of our scalars, instead of 1
|
|
// * number of CPUs
|
|
// * cache friendliness (which depends on the host, G1 or G2... )
|
|
// --> for example, on BN254, a G1 point fits into one cache line of 64bytes, but a G2 point don't.
|
|
|
|
// for each msmCX
|
|
// step 1
|
|
// we compute, for each scalars over c-bit wide windows, nbChunk digits
|
|
// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
|
|
// 2^{c} to the current digit, making it negative.
|
|
// negative digits will be processed in the next step as adding -G into the bucket instead of G
|
|
// (computing -G is cheap, and this saves us half of the buckets)
|
|
// step 2
|
|
// buckets are declared on the stack
|
|
// notice that we have 2^{c-1} buckets instead of 2^{c} (see step1)
|
|
// we use jacobian extended formulas here as they are faster than mixed addition
|
|
// msmProcessChunk places points into buckets base on their selector and return the weighted bucket sum in given channel
|
|
// step 3
|
|
// reduce the buckets weigthed sums into our result (msmReduceChunk)
|
|
|
|
// ensure len(points) == len(scalars)
|
|
nbPoints := len(points)
|
|
if nbPoints != len(scalars) {
|
|
return nil, errors.New("len(points) != len(scalars)")
|
|
}
|
|
|
|
// if nbTasks is not set, use all available CPUs
|
|
if config.NbTasks <= 0 {
|
|
config.NbTasks = runtime.NumCPU()
|
|
}
|
|
|
|
// here, we compute the best C for nbPoints
|
|
// we split recursively until nbChunks(c) >= nbTasks,
|
|
bestC := func(nbPoints int) uint64 {
|
|
// implemented msmC methods (the c we use must be in this slice)
|
|
implementedCs := []uint64{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21}
|
|
var C uint64
|
|
// approximate cost (in group operations)
|
|
// cost = bits/c * (nbPoints + 2^{c})
|
|
// this needs to be verified empirically.
|
|
// for example, on a MBP 2016, for G2 MultiExp > 8M points, hand picking c gives better results
|
|
min := math.MaxFloat64
|
|
for _, c := range implementedCs {
|
|
cc := fr.Limbs * 64 * (nbPoints + (1 << (c)))
|
|
cost := float64(cc) / float64(c)
|
|
if cost < min {
|
|
min = cost
|
|
C = c
|
|
}
|
|
}
|
|
// empirical, needs to be tuned.
|
|
// if C > 16 && nbPoints < 1 << 23 {
|
|
// C = 16
|
|
// }
|
|
return C
|
|
}
|
|
|
|
var C uint64
|
|
nbSplits := 1
|
|
nbChunks := 0
|
|
for nbChunks < config.NbTasks {
|
|
C = bestC(nbPoints)
|
|
nbChunks = int(fr.Limbs * 64 / C) // number of c-bit radixes in a scalar
|
|
if (fr.Limbs*64)%C != 0 {
|
|
nbChunks++
|
|
}
|
|
nbChunks *= nbSplits
|
|
if nbChunks < config.NbTasks {
|
|
nbSplits <<= 1
|
|
nbPoints >>= 1
|
|
}
|
|
}
|
|
|
|
// partition the scalars
|
|
// note: we do that before the actual chunk processing, as for each c-bit window (starting from LSW)
|
|
// if it's larger than 2^{c-1}, we have a carry we need to propagate up to the higher window
|
|
var smallValues int
|
|
scalars, smallValues = partitionScalars(scalars, C, config.ScalarsMont, config.NbTasks)
|
|
|
|
// if we have more than 10% of small values, we split the processing of the first chunk in 2
|
|
// we may want to do that in msmInnerPointProj , but that would incur a cost of looping through all scalars one more time
|
|
splitFirstChunk := (float64(smallValues) / float64(len(scalars))) >= 0.1
|
|
|
|
// we have nbSplits intermediate results that we must sum together.
|
|
_p := make([]PointProj, nbSplits-1)
|
|
chDone := make(chan int, nbSplits-1)
|
|
for i := 0; i < nbSplits-1; i++ {
|
|
start := i * nbPoints
|
|
end := start + nbPoints
|
|
go func(start, end, i int) {
|
|
msmInnerPointProj(&_p[i], int(C), points[start:end], scalars[start:end], splitFirstChunk)
|
|
chDone <- i
|
|
}(start, end, i)
|
|
}
|
|
|
|
msmInnerPointProj(p, int(C), points[(nbSplits-1)*nbPoints:], scalars[(nbSplits-1)*nbPoints:], splitFirstChunk)
|
|
for i := 0; i < nbSplits-1; i++ {
|
|
done := <-chDone
|
|
p.Add(p, &_p[done])
|
|
}
|
|
close(chDone)
|
|
return p, nil
|
|
}
|
|
|
|
func msmInnerPointProj(p *PointProj, c int, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) {
|
|
|
|
switch c {
|
|
|
|
case 4:
|
|
msmC4(p, points, scalars, splitFirstChunk)
|
|
|
|
case 5:
|
|
msmC5(p, points, scalars, splitFirstChunk)
|
|
|
|
case 6:
|
|
msmC6(p, points, scalars, splitFirstChunk)
|
|
|
|
case 7:
|
|
msmC7(p, points, scalars, splitFirstChunk)
|
|
|
|
case 8:
|
|
msmC8(p, points, scalars, splitFirstChunk)
|
|
|
|
case 9:
|
|
msmC9(p, points, scalars, splitFirstChunk)
|
|
|
|
case 10:
|
|
msmC10(p, points, scalars, splitFirstChunk)
|
|
|
|
case 11:
|
|
msmC11(p, points, scalars, splitFirstChunk)
|
|
|
|
case 12:
|
|
msmC12(p, points, scalars, splitFirstChunk)
|
|
|
|
case 13:
|
|
msmC13(p, points, scalars, splitFirstChunk)
|
|
|
|
case 14:
|
|
msmC14(p, points, scalars, splitFirstChunk)
|
|
|
|
case 15:
|
|
msmC15(p, points, scalars, splitFirstChunk)
|
|
|
|
case 16:
|
|
msmC16(p, points, scalars, splitFirstChunk)
|
|
|
|
case 20:
|
|
msmC20(p, points, scalars, splitFirstChunk)
|
|
|
|
case 21:
|
|
msmC21(p, points, scalars, splitFirstChunk)
|
|
|
|
case 22:
|
|
msmC22(p, points, scalars, splitFirstChunk)
|
|
|
|
default:
|
|
panic("not implemented")
|
|
}
|
|
}
|
|
|
|
// msmReduceChunkPointAffine reduces the weighted sum of the buckets into the result of the multiExp
|
|
func msmReduceChunkPointAffine(p *PointProj, c int, chChunks []chan PointProj) *PointProj {
|
|
var _p PointProj
|
|
totalj := <-chChunks[len(chChunks)-1]
|
|
_p.Set(&totalj)
|
|
for j := len(chChunks) - 2; j >= 0; j-- {
|
|
for l := 0; l < c; l++ {
|
|
_p.Double(&_p)
|
|
}
|
|
totalj := <-chChunks[j]
|
|
_p.Add(&_p, &totalj)
|
|
}
|
|
|
|
p.Set(&_p)
|
|
|
|
return p
|
|
}
|
|
|
|
func msmReduceChunkPointAffineDMA(p *PointProj, c int, chChunks []PointProj) *PointProj {
|
|
var _p PointProj
|
|
totalj := chChunks[len(chChunks)-1]
|
|
_p.Set(&totalj)
|
|
for j := len(chChunks) - 2; j >= 0; j-- {
|
|
for l := 0; l < c; l++ {
|
|
_p.Double(&_p)
|
|
}
|
|
totalj := chChunks[j]
|
|
_p.Add(&_p, &totalj)
|
|
}
|
|
|
|
p.Set(&_p)
|
|
|
|
return p
|
|
}
|
|
|
|
func msmProcessChunkPointAffine(chunk uint64,
|
|
chRes chan<- PointProj,
|
|
buckets []PointProj,
|
|
c uint64,
|
|
points []PointAffine,
|
|
scalars []fr.Element) {
|
|
var total PointProj
|
|
msmProcessChunkPointAffineDMA(chunk, &total, buckets, c, points, scalars)
|
|
chRes <- total
|
|
}
|
|
|
|
func msmProcessChunkPointAffineDMA(chunk uint64,
|
|
res *PointProj,
|
|
buckets []PointProj,
|
|
c uint64,
|
|
points []PointAffine,
|
|
scalars []fr.Element) {
|
|
|
|
mask := uint64((1 << c) - 1) // low c bits are 1
|
|
msbWindow := uint64(1 << (c - 1))
|
|
|
|
for i := 0; i < len(buckets); i++ {
|
|
buckets[i] = Identity
|
|
}
|
|
|
|
jc := uint64(chunk * c)
|
|
s := selector{}
|
|
s.index = jc / 64
|
|
s.shift = jc - (s.index * 64)
|
|
s.mask = mask << s.shift
|
|
s.multiWordSelect = (64%c) != 0 && s.shift > (64-c) && s.index < (fr.Limbs-1)
|
|
if s.multiWordSelect {
|
|
nbBitsHigh := s.shift - uint64(64-c)
|
|
s.maskHigh = (1 << nbBitsHigh) - 1
|
|
s.shiftHigh = (c - nbBitsHigh)
|
|
}
|
|
|
|
// for each scalars, get the digit corresponding to the chunk we're processing.
|
|
for i := 0; i < len(scalars); i++ {
|
|
bits := (scalars[i][s.index] & s.mask) >> s.shift
|
|
if s.multiWordSelect {
|
|
bits += (scalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
|
|
}
|
|
|
|
if bits == 0 {
|
|
continue
|
|
}
|
|
|
|
// if msbWindow bit is set, we need to substract
|
|
if bits&msbWindow == 0 {
|
|
// add
|
|
var pProj PointProj
|
|
pProj.FromAffine(&points[i])
|
|
buckets[bits-1].Add(&pProj, &buckets[bits-1])
|
|
} else {
|
|
// sub
|
|
var pProj PointProj
|
|
pProj.FromAffine(&points[i])
|
|
pProj.Neg(&pProj)
|
|
buckets[bits & ^msbWindow].Add(&buckets[bits & ^msbWindow], &pProj)
|
|
}
|
|
}
|
|
|
|
// reduce buckets into total
|
|
// total = bucket[0] + 2*bucket[1] + 3*bucket[2] ... + n*bucket[n-1]
|
|
|
|
runningSum, total := Identity, Identity
|
|
for k := len(buckets) - 1; k >= 0; k-- {
|
|
|
|
runningSum.Add(&runningSum, &buckets[k])
|
|
|
|
total.Add(&total, &runningSum)
|
|
}
|
|
|
|
*res = total
|
|
|
|
}
|
|
|
|
func msmC4(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 4 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks]PointProj
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, pointProj *PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffineDMA(uint64(j), pointProj, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
var wg sync.WaitGroup
|
|
wg.Add(int(nbChunks - 1))
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
j := j
|
|
go func() {
|
|
processChunk(j, points, scalars, &chChunks[j])
|
|
wg.Done()
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
|
|
if !splitFirstChunk {
|
|
processChunk(0, points, scalars, &chChunks[0])
|
|
} else {
|
|
chSplits := make([]PointProj, 2)
|
|
split := len(points) / 2
|
|
var wg sync.WaitGroup
|
|
wg.Add(2)
|
|
go func() {
|
|
processChunk(0, points[:split], scalars[:split], &chSplits[0])
|
|
wg.Done()
|
|
}()
|
|
go func() {
|
|
processChunk(0, points[split:], scalars[split:], &chSplits[1])
|
|
wg.Done()
|
|
}()
|
|
wg.Wait()
|
|
chSplits[0].Add(&chSplits[0], &chSplits[1])
|
|
chChunks[0] = chSplits[0]
|
|
}
|
|
|
|
return msmReduceChunkPointAffineDMA(p, c, chChunks[:])
|
|
}
|
|
|
|
func msmC5(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 5 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks + 1]chan PointProj
|
|
for i := 0; i < len(chChunks); i++ {
|
|
chChunks[i] = make(chan PointProj, 1)
|
|
}
|
|
|
|
// c doesn't divide 256, last window is smaller we can allocate less buckets
|
|
const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
|
|
go func(j uint64, points []PointAffine, scalars []fr.Element) {
|
|
var buckets [1 << (lastC - 1)]PointProj
|
|
msmProcessChunkPointAffine(j, chChunks[j], buckets[:], c, points, scalars)
|
|
}(uint64(nbChunks), points, scalars)
|
|
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, chChunk chan PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffine(uint64(j), chChunk, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
go processChunk(j, points, scalars, chChunks[j])
|
|
}
|
|
|
|
if !splitFirstChunk {
|
|
go processChunk(0, points, scalars, chChunks[0])
|
|
} else {
|
|
chSplit := make(chan PointProj, 2)
|
|
split := len(points) / 2
|
|
go processChunk(0, points[:split], scalars[:split], chSplit)
|
|
go processChunk(0, points[split:], scalars[split:], chSplit)
|
|
go func() {
|
|
s1 := <-chSplit
|
|
s2 := <-chSplit
|
|
close(chSplit)
|
|
s1.Add(&s1, &s2)
|
|
chChunks[0] <- s1
|
|
}()
|
|
}
|
|
|
|
return msmReduceChunkPointAffine(p, c, chChunks[:])
|
|
}
|
|
|
|
func msmC6(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 6 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks + 1]chan PointProj
|
|
for i := 0; i < len(chChunks); i++ {
|
|
chChunks[i] = make(chan PointProj, 1)
|
|
}
|
|
|
|
// c doesn't divide 256, last window is smaller we can allocate less buckets
|
|
const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
|
|
go func(j uint64, points []PointAffine, scalars []fr.Element) {
|
|
var buckets [1 << (lastC - 1)]PointProj
|
|
msmProcessChunkPointAffine(j, chChunks[j], buckets[:], c, points, scalars)
|
|
}(uint64(nbChunks), points, scalars)
|
|
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, chChunk chan PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffine(uint64(j), chChunk, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
go processChunk(j, points, scalars, chChunks[j])
|
|
}
|
|
|
|
if !splitFirstChunk {
|
|
go processChunk(0, points, scalars, chChunks[0])
|
|
} else {
|
|
chSplit := make(chan PointProj, 2)
|
|
split := len(points) / 2
|
|
go processChunk(0, points[:split], scalars[:split], chSplit)
|
|
go processChunk(0, points[split:], scalars[split:], chSplit)
|
|
go func() {
|
|
s1 := <-chSplit
|
|
s2 := <-chSplit
|
|
close(chSplit)
|
|
s1.Add(&s1, &s2)
|
|
chChunks[0] <- s1
|
|
}()
|
|
}
|
|
|
|
return msmReduceChunkPointAffine(p, c, chChunks[:])
|
|
}
|
|
|
|
func msmC7(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 7 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks + 1]chan PointProj
|
|
for i := 0; i < len(chChunks); i++ {
|
|
chChunks[i] = make(chan PointProj, 1)
|
|
}
|
|
|
|
// c doesn't divide 256, last window is smaller we can allocate less buckets
|
|
const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
|
|
go func(j uint64, points []PointAffine, scalars []fr.Element) {
|
|
var buckets [1 << (lastC - 1)]PointProj
|
|
msmProcessChunkPointAffine(j, chChunks[j], buckets[:], c, points, scalars)
|
|
}(uint64(nbChunks), points, scalars)
|
|
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, chChunk chan PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffine(uint64(j), chChunk, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
go processChunk(j, points, scalars, chChunks[j])
|
|
}
|
|
|
|
if !splitFirstChunk {
|
|
go processChunk(0, points, scalars, chChunks[0])
|
|
} else {
|
|
chSplit := make(chan PointProj, 2)
|
|
split := len(points) / 2
|
|
go processChunk(0, points[:split], scalars[:split], chSplit)
|
|
go processChunk(0, points[split:], scalars[split:], chSplit)
|
|
go func() {
|
|
s1 := <-chSplit
|
|
s2 := <-chSplit
|
|
close(chSplit)
|
|
s1.Add(&s1, &s2)
|
|
chChunks[0] <- s1
|
|
}()
|
|
}
|
|
|
|
return msmReduceChunkPointAffine(p, c, chChunks[:])
|
|
}
|
|
|
|
func msmC8(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 8 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks]chan PointProj
|
|
for i := 0; i < len(chChunks); i++ {
|
|
chChunks[i] = make(chan PointProj, 1)
|
|
}
|
|
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, chChunk chan PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffine(uint64(j), chChunk, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
go processChunk(j, points, scalars, chChunks[j])
|
|
}
|
|
|
|
if !splitFirstChunk {
|
|
go processChunk(0, points, scalars, chChunks[0])
|
|
} else {
|
|
chSplit := make(chan PointProj, 2)
|
|
split := len(points) / 2
|
|
go processChunk(0, points[:split], scalars[:split], chSplit)
|
|
go processChunk(0, points[split:], scalars[split:], chSplit)
|
|
go func() {
|
|
s1 := <-chSplit
|
|
s2 := <-chSplit
|
|
close(chSplit)
|
|
s1.Add(&s1, &s2)
|
|
chChunks[0] <- s1
|
|
}()
|
|
}
|
|
|
|
return msmReduceChunkPointAffine(p, c, chChunks[:])
|
|
}
|
|
|
|
func msmC9(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 9 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks + 1]chan PointProj
|
|
for i := 0; i < len(chChunks); i++ {
|
|
chChunks[i] = make(chan PointProj, 1)
|
|
}
|
|
|
|
// c doesn't divide 256, last window is smaller we can allocate less buckets
|
|
const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
|
|
go func(j uint64, points []PointAffine, scalars []fr.Element) {
|
|
var buckets [1 << (lastC - 1)]PointProj
|
|
msmProcessChunkPointAffine(j, chChunks[j], buckets[:], c, points, scalars)
|
|
}(uint64(nbChunks), points, scalars)
|
|
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, chChunk chan PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffine(uint64(j), chChunk, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
go processChunk(j, points, scalars, chChunks[j])
|
|
}
|
|
|
|
if !splitFirstChunk {
|
|
go processChunk(0, points, scalars, chChunks[0])
|
|
} else {
|
|
chSplit := make(chan PointProj, 2)
|
|
split := len(points) / 2
|
|
go processChunk(0, points[:split], scalars[:split], chSplit)
|
|
go processChunk(0, points[split:], scalars[split:], chSplit)
|
|
go func() {
|
|
s1 := <-chSplit
|
|
s2 := <-chSplit
|
|
close(chSplit)
|
|
s1.Add(&s1, &s2)
|
|
chChunks[0] <- s1
|
|
}()
|
|
}
|
|
|
|
return msmReduceChunkPointAffine(p, c, chChunks[:])
|
|
}
|
|
|
|
func msmC10(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 10 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks + 1]chan PointProj
|
|
for i := 0; i < len(chChunks); i++ {
|
|
chChunks[i] = make(chan PointProj, 1)
|
|
}
|
|
|
|
// c doesn't divide 256, last window is smaller we can allocate less buckets
|
|
const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
|
|
go func(j uint64, points []PointAffine, scalars []fr.Element) {
|
|
var buckets [1 << (lastC - 1)]PointProj
|
|
msmProcessChunkPointAffine(j, chChunks[j], buckets[:], c, points, scalars)
|
|
}(uint64(nbChunks), points, scalars)
|
|
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, chChunk chan PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffine(uint64(j), chChunk, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
go processChunk(j, points, scalars, chChunks[j])
|
|
}
|
|
|
|
if !splitFirstChunk {
|
|
go processChunk(0, points, scalars, chChunks[0])
|
|
} else {
|
|
chSplit := make(chan PointProj, 2)
|
|
split := len(points) / 2
|
|
go processChunk(0, points[:split], scalars[:split], chSplit)
|
|
go processChunk(0, points[split:], scalars[split:], chSplit)
|
|
go func() {
|
|
s1 := <-chSplit
|
|
s2 := <-chSplit
|
|
close(chSplit)
|
|
s1.Add(&s1, &s2)
|
|
chChunks[0] <- s1
|
|
}()
|
|
}
|
|
|
|
return msmReduceChunkPointAffine(p, c, chChunks[:])
|
|
}
|
|
|
|
func msmC11(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 11 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks + 1]chan PointProj
|
|
for i := 0; i < len(chChunks); i++ {
|
|
chChunks[i] = make(chan PointProj, 1)
|
|
}
|
|
|
|
// c doesn't divide 256, last window is smaller we can allocate less buckets
|
|
const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
|
|
go func(j uint64, points []PointAffine, scalars []fr.Element) {
|
|
var buckets [1 << (lastC - 1)]PointProj
|
|
msmProcessChunkPointAffine(j, chChunks[j], buckets[:], c, points, scalars)
|
|
}(uint64(nbChunks), points, scalars)
|
|
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, chChunk chan PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffine(uint64(j), chChunk, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
go processChunk(j, points, scalars, chChunks[j])
|
|
}
|
|
|
|
if !splitFirstChunk {
|
|
go processChunk(0, points, scalars, chChunks[0])
|
|
} else {
|
|
chSplit := make(chan PointProj, 2)
|
|
split := len(points) / 2
|
|
go processChunk(0, points[:split], scalars[:split], chSplit)
|
|
go processChunk(0, points[split:], scalars[split:], chSplit)
|
|
go func() {
|
|
s1 := <-chSplit
|
|
s2 := <-chSplit
|
|
close(chSplit)
|
|
s1.Add(&s1, &s2)
|
|
chChunks[0] <- s1
|
|
}()
|
|
}
|
|
|
|
return msmReduceChunkPointAffine(p, c, chChunks[:])
|
|
}
|
|
|
|
func msmC12(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 12 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks + 1]chan PointProj
|
|
for i := 0; i < len(chChunks); i++ {
|
|
chChunks[i] = make(chan PointProj, 1)
|
|
}
|
|
|
|
// c doesn't divide 256, last window is smaller we can allocate less buckets
|
|
const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
|
|
go func(j uint64, points []PointAffine, scalars []fr.Element) {
|
|
var buckets [1 << (lastC - 1)]PointProj
|
|
msmProcessChunkPointAffine(j, chChunks[j], buckets[:], c, points, scalars)
|
|
}(uint64(nbChunks), points, scalars)
|
|
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, chChunk chan PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffine(uint64(j), chChunk, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
go processChunk(j, points, scalars, chChunks[j])
|
|
}
|
|
|
|
if !splitFirstChunk {
|
|
go processChunk(0, points, scalars, chChunks[0])
|
|
} else {
|
|
chSplit := make(chan PointProj, 2)
|
|
split := len(points) / 2
|
|
go processChunk(0, points[:split], scalars[:split], chSplit)
|
|
go processChunk(0, points[split:], scalars[split:], chSplit)
|
|
go func() {
|
|
s1 := <-chSplit
|
|
s2 := <-chSplit
|
|
close(chSplit)
|
|
s1.Add(&s1, &s2)
|
|
chChunks[0] <- s1
|
|
}()
|
|
}
|
|
|
|
return msmReduceChunkPointAffine(p, c, chChunks[:])
|
|
}
|
|
|
|
func msmC13(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 13 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks + 1]chan PointProj
|
|
for i := 0; i < len(chChunks); i++ {
|
|
chChunks[i] = make(chan PointProj, 1)
|
|
}
|
|
|
|
// c doesn't divide 256, last window is smaller we can allocate less buckets
|
|
const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
|
|
go func(j uint64, points []PointAffine, scalars []fr.Element) {
|
|
var buckets [1 << (lastC - 1)]PointProj
|
|
msmProcessChunkPointAffine(j, chChunks[j], buckets[:], c, points, scalars)
|
|
}(uint64(nbChunks), points, scalars)
|
|
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, chChunk chan PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffine(uint64(j), chChunk, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
go processChunk(j, points, scalars, chChunks[j])
|
|
}
|
|
|
|
if !splitFirstChunk {
|
|
go processChunk(0, points, scalars, chChunks[0])
|
|
} else {
|
|
chSplit := make(chan PointProj, 2)
|
|
split := len(points) / 2
|
|
go processChunk(0, points[:split], scalars[:split], chSplit)
|
|
go processChunk(0, points[split:], scalars[split:], chSplit)
|
|
go func() {
|
|
s1 := <-chSplit
|
|
s2 := <-chSplit
|
|
close(chSplit)
|
|
s1.Add(&s1, &s2)
|
|
chChunks[0] <- s1
|
|
}()
|
|
}
|
|
|
|
return msmReduceChunkPointAffine(p, c, chChunks[:])
|
|
}
|
|
|
|
func msmC14(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 14 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks + 1]chan PointProj
|
|
for i := 0; i < len(chChunks); i++ {
|
|
chChunks[i] = make(chan PointProj, 1)
|
|
}
|
|
|
|
// c doesn't divide 256, last window is smaller we can allocate less buckets
|
|
const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
|
|
go func(j uint64, points []PointAffine, scalars []fr.Element) {
|
|
var buckets [1 << (lastC - 1)]PointProj
|
|
msmProcessChunkPointAffine(j, chChunks[j], buckets[:], c, points, scalars)
|
|
}(uint64(nbChunks), points, scalars)
|
|
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, chChunk chan PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffine(uint64(j), chChunk, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
go processChunk(j, points, scalars, chChunks[j])
|
|
}
|
|
|
|
if !splitFirstChunk {
|
|
go processChunk(0, points, scalars, chChunks[0])
|
|
} else {
|
|
chSplit := make(chan PointProj, 2)
|
|
split := len(points) / 2
|
|
go processChunk(0, points[:split], scalars[:split], chSplit)
|
|
go processChunk(0, points[split:], scalars[split:], chSplit)
|
|
go func() {
|
|
s1 := <-chSplit
|
|
s2 := <-chSplit
|
|
close(chSplit)
|
|
s1.Add(&s1, &s2)
|
|
chChunks[0] <- s1
|
|
}()
|
|
}
|
|
|
|
return msmReduceChunkPointAffine(p, c, chChunks[:])
|
|
}
|
|
|
|
func msmC15(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 15 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks + 1]chan PointProj
|
|
for i := 0; i < len(chChunks); i++ {
|
|
chChunks[i] = make(chan PointProj, 1)
|
|
}
|
|
|
|
// c doesn't divide 256, last window is smaller we can allocate less buckets
|
|
const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
|
|
go func(j uint64, points []PointAffine, scalars []fr.Element) {
|
|
var buckets [1 << (lastC - 1)]PointProj
|
|
msmProcessChunkPointAffine(j, chChunks[j], buckets[:], c, points, scalars)
|
|
}(uint64(nbChunks), points, scalars)
|
|
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, chChunk chan PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffine(uint64(j), chChunk, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
go processChunk(j, points, scalars, chChunks[j])
|
|
}
|
|
|
|
if !splitFirstChunk {
|
|
go processChunk(0, points, scalars, chChunks[0])
|
|
} else {
|
|
chSplit := make(chan PointProj, 2)
|
|
split := len(points) / 2
|
|
go processChunk(0, points[:split], scalars[:split], chSplit)
|
|
go processChunk(0, points[split:], scalars[split:], chSplit)
|
|
go func() {
|
|
s1 := <-chSplit
|
|
s2 := <-chSplit
|
|
close(chSplit)
|
|
s1.Add(&s1, &s2)
|
|
chChunks[0] <- s1
|
|
}()
|
|
}
|
|
|
|
return msmReduceChunkPointAffine(p, c, chChunks[:])
|
|
}
|
|
|
|
func msmC16(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 16 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks]chan PointProj
|
|
for i := 0; i < len(chChunks); i++ {
|
|
chChunks[i] = make(chan PointProj, 1)
|
|
}
|
|
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, chChunk chan PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffine(uint64(j), chChunk, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
go processChunk(j, points, scalars, chChunks[j])
|
|
}
|
|
|
|
if !splitFirstChunk {
|
|
go processChunk(0, points, scalars, chChunks[0])
|
|
} else {
|
|
chSplit := make(chan PointProj, 2)
|
|
split := len(points) / 2
|
|
go processChunk(0, points[:split], scalars[:split], chSplit)
|
|
go processChunk(0, points[split:], scalars[split:], chSplit)
|
|
go func() {
|
|
s1 := <-chSplit
|
|
s2 := <-chSplit
|
|
close(chSplit)
|
|
s1.Add(&s1, &s2)
|
|
chChunks[0] <- s1
|
|
}()
|
|
}
|
|
|
|
return msmReduceChunkPointAffine(p, c, chChunks[:])
|
|
}
|
|
|
|
func msmC20(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 20 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks + 1]chan PointProj
|
|
for i := 0; i < len(chChunks); i++ {
|
|
chChunks[i] = make(chan PointProj, 1)
|
|
}
|
|
|
|
// c doesn't divide 256, last window is smaller we can allocate less buckets
|
|
const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
|
|
go func(j uint64, points []PointAffine, scalars []fr.Element) {
|
|
var buckets [1 << (lastC - 1)]PointProj
|
|
msmProcessChunkPointAffine(j, chChunks[j], buckets[:], c, points, scalars)
|
|
}(uint64(nbChunks), points, scalars)
|
|
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, chChunk chan PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffine(uint64(j), chChunk, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
go processChunk(j, points, scalars, chChunks[j])
|
|
}
|
|
|
|
if !splitFirstChunk {
|
|
go processChunk(0, points, scalars, chChunks[0])
|
|
} else {
|
|
chSplit := make(chan PointProj, 2)
|
|
split := len(points) / 2
|
|
go processChunk(0, points[:split], scalars[:split], chSplit)
|
|
go processChunk(0, points[split:], scalars[split:], chSplit)
|
|
go func() {
|
|
s1 := <-chSplit
|
|
s2 := <-chSplit
|
|
close(chSplit)
|
|
s1.Add(&s1, &s2)
|
|
chChunks[0] <- s1
|
|
}()
|
|
}
|
|
|
|
return msmReduceChunkPointAffine(p, c, chChunks[:])
|
|
}
|
|
|
|
func msmC21(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 21 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks + 1]chan PointProj
|
|
for i := 0; i < len(chChunks); i++ {
|
|
chChunks[i] = make(chan PointProj, 1)
|
|
}
|
|
|
|
// c doesn't divide 256, last window is smaller we can allocate less buckets
|
|
const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
|
|
go func(j uint64, points []PointAffine, scalars []fr.Element) {
|
|
var buckets [1 << (lastC - 1)]PointProj
|
|
msmProcessChunkPointAffine(j, chChunks[j], buckets[:], c, points, scalars)
|
|
}(uint64(nbChunks), points, scalars)
|
|
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, chChunk chan PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffine(uint64(j), chChunk, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
go processChunk(j, points, scalars, chChunks[j])
|
|
}
|
|
|
|
if !splitFirstChunk {
|
|
go processChunk(0, points, scalars, chChunks[0])
|
|
} else {
|
|
chSplit := make(chan PointProj, 2)
|
|
split := len(points) / 2
|
|
go processChunk(0, points[:split], scalars[:split], chSplit)
|
|
go processChunk(0, points[split:], scalars[split:], chSplit)
|
|
go func() {
|
|
s1 := <-chSplit
|
|
s2 := <-chSplit
|
|
close(chSplit)
|
|
s1.Add(&s1, &s2)
|
|
chChunks[0] <- s1
|
|
}()
|
|
}
|
|
|
|
return msmReduceChunkPointAffine(p, c, chChunks[:])
|
|
}
|
|
|
|
func msmC22(p *PointProj, points []PointAffine, scalars []fr.Element, splitFirstChunk bool) *PointProj {
|
|
const (
|
|
c = 22 // scalars partitioned into c-bit radixes
|
|
nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
|
|
)
|
|
|
|
// for each chunk, spawn one go routine that'll loop through all the scalars in the
|
|
// corresponding bit-window
|
|
// note that buckets is an array allocated on the stack (for most sizes of c) and this is
|
|
// critical for performance
|
|
|
|
// each go routine sends its result in chChunks[i] channel
|
|
var chChunks [nbChunks + 1]chan PointProj
|
|
for i := 0; i < len(chChunks); i++ {
|
|
chChunks[i] = make(chan PointProj, 1)
|
|
}
|
|
|
|
// c doesn't divide 256, last window is smaller we can allocate less buckets
|
|
const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
|
|
go func(j uint64, points []PointAffine, scalars []fr.Element) {
|
|
var buckets [1 << (lastC - 1)]PointProj
|
|
msmProcessChunkPointAffine(j, chChunks[j], buckets[:], c, points, scalars)
|
|
}(uint64(nbChunks), points, scalars)
|
|
|
|
processChunk := func(j int, points []PointAffine, scalars []fr.Element, chChunk chan PointProj) {
|
|
var buckets [1 << (c - 1)]PointProj
|
|
msmProcessChunkPointAffine(uint64(j), chChunk, buckets[:], c, points, scalars)
|
|
}
|
|
|
|
for j := int(nbChunks - 1); j > 0; j-- {
|
|
go processChunk(j, points, scalars, chChunks[j])
|
|
}
|
|
|
|
if !splitFirstChunk {
|
|
go processChunk(0, points, scalars, chChunks[0])
|
|
} else {
|
|
chSplit := make(chan PointProj, 2)
|
|
split := len(points) / 2
|
|
go processChunk(0, points[:split], scalars[:split], chSplit)
|
|
go processChunk(0, points[split:], scalars[split:], chSplit)
|
|
go func() {
|
|
s1 := <-chSplit
|
|
s2 := <-chSplit
|
|
close(chSplit)
|
|
s1.Add(&s1, &s2)
|
|
chChunks[0] <- s1
|
|
}()
|
|
}
|
|
|
|
return msmReduceChunkPointAffine(p, c, chChunks[:])
|
|
}
|