ecmult_gen_amd64.go raw

   1  //go:build amd64 && !purego
   2  
   3  package p256k1
   4  
   5  // =============================================================================
   6  // AMD64-optimized Generator Precomputation using Group4x64
   7  // =============================================================================
   8  //
   9  // This file contains optimized generator multiplication using the faster
  10  // Group4x64 field representation with BMI2 MULX instructions.
  11  
  12  // Precomputed tables in 4x64 format for faster operations
  13  var (
  14  	// preGenG4x64 contains odd multiples of G in 4x64 format
  15  	preGenG4x64 [genTableSize]GroupElement4x64Affine
  16  
  17  	// preGenLambdaG4x64 contains odd multiples of λ*G in 4x64 format
  18  	preGenLambdaG4x64 [genTableSize]GroupElement4x64Affine
  19  
  20  	// gen4x64TablesInitialized tracks whether the 4x64 tables have been computed
  21  	gen4x64TablesInitialized bool
  22  )
  23  
  24  // initGen4x64Tables converts the precomputed tables to 4x64 format
  25  func initGen4x64Tables() {
  26  	if gen4x64TablesInitialized {
  27  		return
  28  	}
  29  
  30  	// Ensure base tables are initialized first
  31  	initGenTables()
  32  
  33  	// Convert preGenG to 4x64 format
  34  	for i := 0; i < genTableSize; i++ {
  35  		preGenG4x64[i].FromGroupElementAffine(&preGenG[i])
  36  	}
  37  
  38  	// Convert preGenLambdaG to 4x64 format
  39  	for i := 0; i < genTableSize; i++ {
  40  		preGenLambdaG4x64[i].FromGroupElementAffine(&preGenLambdaG[i])
  41  	}
  42  
  43  	gen4x64TablesInitialized = true
  44  }
  45  
  46  // ecmultGenGLV4x64 computes r = k * G using 4x64 optimized operations
  47  // This is significantly faster than the generic version on AMD64
  48  func ecmultGenGLV4x64(r *GroupElementJacobian, k *Scalar) {
  49  	if k.isZero() {
  50  		r.setInfinity()
  51  		return
  52  	}
  53  
  54  	// Ensure tables are initialized
  55  	initGen4x64Tables()
  56  
  57  	// Split scalar using GLV: k = k1 + k2*λ
  58  	var k1, k2 Scalar
  59  	scalarSplitLambda(&k1, &k2, k)
  60  
  61  	// Normalize k1 and k2 to be "low" (not high)
  62  	neg1 := k1.isHigh()
  63  	if neg1 {
  64  		k1.negate(&k1)
  65  	}
  66  
  67  	neg2 := k2.isHigh()
  68  	if neg2 {
  69  		k2.negate(&k2)
  70  	}
  71  
  72  	// Convert to wNAF
  73  	var wnaf1, wnaf2 [257]int8
  74  
  75  	bits1 := k1.wNAF(&wnaf1, genWindowSize)
  76  	bits2 := k2.wNAF(&wnaf2, genWindowSize)
  77  
  78  	// Find maximum bit position
  79  	maxBits := bits1
  80  	if bits2 > maxBits {
  81  		maxBits = bits2
  82  	}
  83  
  84  	// Perform Strauss algorithm using 4x64 operations
  85  	var r4x64 GroupElement4x64Jacobian
  86  	r4x64.setInfinity()
  87  
  88  	for i := maxBits - 1; i >= 0; i-- {
  89  		// Double the result
  90  		if !r4x64.isInfinity() {
  91  			r4x64.double(&r4x64)
  92  		}
  93  
  94  		// Add contribution from k1 (using preGenG4x64 table)
  95  		if i < bits1 && wnaf1[i] != 0 {
  96  			var pt GroupElement4x64Affine
  97  			n := int(wnaf1[i])
  98  
  99  			var idx int
 100  			if n > 0 {
 101  				idx = (n - 1) / 2
 102  			} else {
 103  				idx = (-n - 1) / 2
 104  			}
 105  
 106  			if idx < genTableSize {
 107  				pt = preGenG4x64[idx]
 108  				// Negate if wNAF digit is negative
 109  				if n < 0 {
 110  					pt.negate(&pt)
 111  				}
 112  				// Negate if k1 was negated during normalization
 113  				if neg1 {
 114  					pt.negate(&pt)
 115  				}
 116  
 117  				if r4x64.isInfinity() {
 118  					r4x64.setGE(&pt)
 119  				} else {
 120  					r4x64.addGE(&r4x64, &pt)
 121  				}
 122  			}
 123  		}
 124  
 125  		// Add contribution from k2 (using preGenLambdaG4x64 table)
 126  		if i < bits2 && wnaf2[i] != 0 {
 127  			var pt GroupElement4x64Affine
 128  			n := int(wnaf2[i])
 129  
 130  			var idx int
 131  			if n > 0 {
 132  				idx = (n - 1) / 2
 133  			} else {
 134  				idx = (-n - 1) / 2
 135  			}
 136  
 137  			if idx < genTableSize {
 138  				pt = preGenLambdaG4x64[idx]
 139  				// Negate if wNAF digit is negative
 140  				if n < 0 {
 141  					pt.negate(&pt)
 142  				}
 143  				// Negate if k2 was negated during normalization
 144  				if neg2 {
 145  					pt.negate(&pt)
 146  				}
 147  
 148  				if r4x64.isInfinity() {
 149  					r4x64.setGE(&pt)
 150  				} else {
 151  					r4x64.addGE(&r4x64, &pt)
 152  				}
 153  			}
 154  		}
 155  	}
 156  
 157  	// Convert result back to standard format
 158  	r4x64.ToGroupElementJacobian(r)
 159  }
 160  
 161  // EcmultGen computes r = k * G using the fastest available method
 162  // On AMD64, this uses the optimized 4x64 implementation
 163  func EcmultGen(r *GroupElementJacobian, k *Scalar) {
 164  	ecmultGenGLV4x64(r, k)
 165  }
 166  
 167  // EcmultGenGLV is the public interface for fast generator multiplication
 168  // r = k * G
 169  func EcmultGenGLV(r *GroupElementJacobian, k *Scalar) {
 170  	ecmultGenGLV4x64(r, k)
 171  }
 172