p256.go raw

   1  /*
   2  Copyright Suzhou Tongji Fintech Research Institute 2017 All Rights Reserved.
   3  Licensed under the Apache License, Version 2.0 (the "License");
   4  you may not use this file except in compliance with the License.
   5  You may obtain a copy of the License at
   6  
   7  	http://www.apache.org/licenses/LICENSE-2.0
   8  
   9  Unless required by applicable law or agreed to in writing, software
  10  distributed under the License is distributed on an "AS IS" BASIS,
  11  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  See the License for the specific language governing permissions and
  13  limitations under the License.
  14  */
  15  
  16  package sm2
  17  
  18  import (
  19  	"crypto/elliptic"
  20  	"math/big"
  21  	"sync"
  22  )
  23  
  24  /** 学习标准库p256的优化方法实现sm2的快速版本
  25   * 标准库的p256的代码实现有些晦涩难懂,当然sm2的同样如此,有兴趣的大家可以研究研究,最后神兽压阵。。。
  26   *
  27   * ━━━━━━animal━━━━━━
  28   *    ┏┓   ┏┓
  29   *   ┏┛┻━━━┛┻┓
  30   *   ┃       ┃
  31   *   ┃   ━   ┃
  32   *   ┃ ┳┛ ┗┳ ┃
  33   *   ┃       ┃
  34   *   ┃   ┻   ┃
  35   *   ┃       ┃
  36   *   ┗━┓   ┏━┛
  37   *    ┃   ┃
  38   *    ┃   ┃
  39   *    ┃   ┗━━━┓
  40   *	    ┃     ┣┓
  41   *     ┃     ┏┛
  42   *    ┗┓┓┏━┳┓┏┛
  43   *    ┃┫┫ ┃┫┫
  44   *    ┗┻┛ ┗┻┛
  45   *
  46   * ━━━━━Kawaii ━━━━━━
  47   */
  48  
  49  type sm2P256Curve struct {
  50  	RInverse *big.Int
  51  	*elliptic.CurveParams
  52  	a, b, gx, gy sm2P256FieldElement
  53  }
  54  
  55  var initonce sync.Once
  56  var sm2P256 sm2P256Curve
  57  
  58  type sm2P256FieldElement [9]uint32
  59  type sm2P256LargeFieldElement [17]uint64
  60  
  61  const (
  62  	bottom28Bits = 0xFFFFFFF
  63  	bottom29Bits = 0x1FFFFFFF
  64  )
  65  
  66  func initP256Sm2() {
  67  	sm2P256.CurveParams = &elliptic.CurveParams{Name: "SM2-P-256"} // sm2
  68  	A, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFC", 16)
  69  	//SM2椭	椭 圆 曲 线 公 钥 密 码 算 法 推 荐 曲 线 参 数
  70  	sm2P256.P, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
  71  	sm2P256.N, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16)
  72  	sm2P256.B, _ = new(big.Int).SetString("28E9FA9E9D9F5E344D5A9E4BCF6509A7F39789F515AB8F92DDBCBD414D940E93", 16)
  73  	sm2P256.Gx, _ = new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16)
  74  	sm2P256.Gy, _ = new(big.Int).SetString("BC3736A2F4F6779C59BDCEE36B692153D0A9877CC62A474002DF32E52139F0A0", 16)
  75  	sm2P256.RInverse, _ = new(big.Int).SetString("7ffffffd80000002fffffffe000000017ffffffe800000037ffffffc80000002", 16)
  76  	sm2P256.BitSize = 256
  77  	sm2P256FromBig(&sm2P256.a, A)
  78  	sm2P256FromBig(&sm2P256.gx, sm2P256.Gx)
  79  	sm2P256FromBig(&sm2P256.gy, sm2P256.Gy)
  80  	sm2P256FromBig(&sm2P256.b, sm2P256.B)
  81  }
  82  
  83  func P256Sm2() elliptic.Curve {
  84  	initonce.Do(initP256Sm2)
  85  	return sm2P256
  86  }
  87  
  88  func (curve sm2P256Curve) Params() *elliptic.CurveParams {
  89  	return sm2P256.CurveParams
  90  }
  91  
  92  // y^2 = x^3 + ax + b
  93  func (curve sm2P256Curve) IsOnCurve(X, Y *big.Int) bool {
  94  	var a, x, y, y2, x3 sm2P256FieldElement
  95  
  96  	sm2P256FromBig(&x, X)
  97  	sm2P256FromBig(&y, Y)
  98  
  99  	sm2P256Square(&x3, &x)       // x3 = x ^ 2
 100  	sm2P256Mul(&x3, &x3, &x)     // x3 = x ^ 2 * x
 101  	sm2P256Mul(&a, &curve.a, &x) // a = a * x
 102  	sm2P256Add(&x3, &x3, &a)
 103  	sm2P256Add(&x3, &x3, &curve.b)
 104  
 105  	sm2P256Square(&y2, &y) // y2 = y ^ 2
 106  	return sm2P256ToBig(&x3).Cmp(sm2P256ToBig(&y2)) == 0
 107  }
 108  
 109  func zForAffine(x, y *big.Int) *big.Int {
 110  	z := new(big.Int)
 111  	if x.Sign() != 0 || y.Sign() != 0 {
 112  		z.SetInt64(1)
 113  	}
 114  	return z
 115  }
 116  
 117  func (curve sm2P256Curve) Add(x1, y1, x2, y2 *big.Int) (*big.Int, *big.Int) {
 118  	var X1, Y1, Z1, X2, Y2, Z2, X3, Y3, Z3 sm2P256FieldElement
 119  
 120  	z1 := zForAffine(x1, y1)
 121  	z2 := zForAffine(x2, y2)
 122  	sm2P256FromBig(&X1, x1)
 123  	sm2P256FromBig(&Y1, y1)
 124  	sm2P256FromBig(&Z1, z1)
 125  	sm2P256FromBig(&X2, x2)
 126  	sm2P256FromBig(&Y2, y2)
 127  	sm2P256FromBig(&Z2, z2)
 128  	sm2P256PointAdd(&X1, &Y1, &Z1, &X2, &Y2, &Z2, &X3, &Y3, &Z3)
 129  	return sm2P256ToAffine(&X3, &Y3, &Z3)
 130  }
 131  
 132  func (curve sm2P256Curve) Double(x1, y1 *big.Int) (*big.Int, *big.Int) {
 133  	var X1, Y1, Z1 sm2P256FieldElement
 134  
 135  	z1 := zForAffine(x1, y1)
 136  	sm2P256FromBig(&X1, x1)
 137  	sm2P256FromBig(&Y1, y1)
 138  	sm2P256FromBig(&Z1, z1)
 139  	sm2P256PointDouble(&X1, &Y1, &Z1, &X1, &Y1, &Z1)
 140  	return sm2P256ToAffine(&X1, &Y1, &Z1)
 141  }
 142  
 143  func (curve sm2P256Curve) ScalarMult(x1, y1 *big.Int, k []byte) (*big.Int, *big.Int) {
 144  	var X, Y, Z, X1, Y1 sm2P256FieldElement
 145  	sm2P256FromBig(&X1, x1)
 146  	sm2P256FromBig(&Y1, y1)
 147  	scalar := sm2GenrateWNaf(k)
 148  	scalarReversed := WNafReversed(scalar)
 149  	sm2P256ScalarMult(&X, &Y, &Z, &X1, &Y1, scalarReversed)
 150  	return sm2P256ToAffine(&X, &Y, &Z)
 151  }
 152  
 153  func (curve sm2P256Curve) ScalarBaseMult(k []byte) (*big.Int, *big.Int) {
 154  	var scalarReversed [32]byte
 155  	var X, Y, Z sm2P256FieldElement
 156  
 157  	sm2P256GetScalar(&scalarReversed, k)
 158  	sm2P256ScalarBaseMult(&X, &Y, &Z, &scalarReversed)
 159  	return sm2P256ToAffine(&X, &Y, &Z)
 160  }
 161  
 162  var sm2P256Precomputed = [9 * 2 * 15 * 2]uint32{
 163  	0x830053d, 0x328990f, 0x6c04fe1, 0xc0f72e5, 0x1e19f3c, 0x666b093, 0x175a87b, 0xec38276, 0x222cf4b,
 164  	0x185a1bba, 0x354e593, 0x1295fac1, 0xf2bc469, 0x47c60fa, 0xc19b8a9, 0xf63533e, 0x903ae6b, 0xc79acba,
 165  	0x15b061a4, 0x33e020b, 0xdffb34b, 0xfcf2c8, 0x16582e08, 0x262f203, 0xfb34381, 0xa55452, 0x604f0ff,
 166  	0x41f1f90, 0xd64ced2, 0xee377bf, 0x75f05f0, 0x189467ae, 0xe2244e, 0x1e7700e8, 0x3fbc464, 0x9612d2e,
 167  	0x1341b3b8, 0xee84e23, 0x1edfa5b4, 0x14e6030, 0x19e87be9, 0x92f533c, 0x1665d96c, 0x226653e, 0xa238d3e,
 168  	0xf5c62c, 0x95bb7a, 0x1f0e5a41, 0x28789c3, 0x1f251d23, 0x8726609, 0xe918910, 0x8096848, 0xf63d028,
 169  	0x152296a1, 0x9f561a8, 0x14d376fb, 0x898788a, 0x61a95fb, 0xa59466d, 0x159a003d, 0x1ad1698, 0x93cca08,
 170  	0x1b314662, 0x706e006, 0x11ce1e30, 0x97b710, 0x172fbc0d, 0x8f50158, 0x11c7ffe7, 0xd182cce, 0xc6ad9e8,
 171  	0x12ea31b2, 0xc4e4f38, 0x175b0d96, 0xec06337, 0x75a9c12, 0xb001fdf, 0x93e82f5, 0x34607de, 0xb8035ed,
 172  	0x17f97924, 0x75cf9e6, 0xdceaedd, 0x2529924, 0x1a10c5ff, 0xb1a54dc, 0x19464d8, 0x2d1997, 0xde6a110,
 173  	0x1e276ee5, 0x95c510c, 0x1aca7c7a, 0xfe48aca, 0x121ad4d9, 0xe4132c6, 0x8239b9d, 0x40ea9cd, 0x816c7b,
 174  	0x632d7a4, 0xa679813, 0x5911fcf, 0x82b0f7c, 0x57b0ad5, 0xbef65, 0xd541365, 0x7f9921f, 0xc62e7a,
 175  	0x3f4b32d, 0x58e50e1, 0x6427aed, 0xdcdda67, 0xe8c2d3e, 0x6aa54a4, 0x18df4c35, 0x49a6a8e, 0x3cd3d0c,
 176  	0xd7adf2, 0xcbca97, 0x1bda5f2d, 0x3258579, 0x606b1e6, 0x6fc1b5b, 0x1ac27317, 0x503ca16, 0xa677435,
 177  	0x57bc73, 0x3992a42, 0xbab987b, 0xfab25eb, 0x128912a4, 0x90a1dc4, 0x1402d591, 0x9ffbcfc, 0xaa48856,
 178  	0x7a7c2dc, 0xcefd08a, 0x1b29bda6, 0xa785641, 0x16462d8c, 0x76241b7, 0x79b6c3b, 0x204ae18, 0xf41212b,
 179  	0x1f567a4d, 0xd6ce6db, 0xedf1784, 0x111df34, 0x85d7955, 0x55fc189, 0x1b7ae265, 0xf9281ac, 0xded7740,
 180  	0xf19468b, 0x83763bb, 0x8ff7234, 0x3da7df8, 0x9590ac3, 0xdc96f2a, 0x16e44896, 0x7931009, 0x99d5acc,
 181  	0x10f7b842, 0xaef5e84, 0xc0310d7, 0xdebac2c, 0x2a7b137, 0x4342344, 0x19633649, 0x3a10624, 0x4b4cb56,
 182  	0x1d809c59, 0xac007f, 0x1f0f4bcd, 0xa1ab06e, 0xc5042cf, 0x82c0c77, 0x76c7563, 0x22c30f3, 0x3bf1568,
 183  	0x7a895be, 0xfcca554, 0x12e90e4c, 0x7b4ab5f, 0x13aeb76b, 0x5887e2c, 0x1d7fe1e3, 0x908c8e3, 0x95800ee,
 184  	0xb36bd54, 0xf08905d, 0x4e73ae8, 0xf5a7e48, 0xa67cb0, 0x50e1067, 0x1b944a0a, 0xf29c83a, 0xb23cfb9,
 185  	0xbe1db1, 0x54de6e8, 0xd4707f2, 0x8ebcc2d, 0x2c77056, 0x1568ce4, 0x15fcc849, 0x4069712, 0xe2ed85f,
 186  	0x2c5ff09, 0x42a6929, 0x628e7ea, 0xbd5b355, 0xaf0bd79, 0xaa03699, 0xdb99816, 0x4379cef, 0x81d57b,
 187  	0x11237f01, 0xe2a820b, 0xfd53b95, 0x6beb5ee, 0x1aeb790c, 0xe470d53, 0x2c2cfee, 0x1c1d8d8, 0xa520fc4,
 188  	0x1518e034, 0xa584dd4, 0x29e572b, 0xd4594fc, 0x141a8f6f, 0x8dfccf3, 0x5d20ba3, 0x2eb60c3, 0x9f16eb0,
 189  	0x11cec356, 0xf039f84, 0x1b0990c1, 0xc91e526, 0x10b65bae, 0xf0616e8, 0x173fa3ff, 0xec8ccf9, 0xbe32790,
 190  	0x11da3e79, 0xe2f35c7, 0x908875c, 0xdacf7bd, 0x538c165, 0x8d1487f, 0x7c31aed, 0x21af228, 0x7e1689d,
 191  	0xdfc23ca, 0x24f15dc, 0x25ef3c4, 0x35248cd, 0x99a0f43, 0xa4b6ecc, 0xd066b3, 0x2481152, 0x37a7688,
 192  	0x15a444b6, 0xb62300c, 0x4b841b, 0xa655e79, 0xd53226d, 0xbeb348a, 0x127f3c2, 0xb989247, 0x71a277d,
 193  	0x19e9dfcb, 0xb8f92d0, 0xe2d226c, 0x390a8b0, 0x183cc462, 0x7bd8167, 0x1f32a552, 0x5e02db4, 0xa146ee9,
 194  	0x1a003957, 0x1c95f61, 0x1eeec155, 0x26f811f, 0xf9596ba, 0x3082bfb, 0x96df083, 0x3e3a289, 0x7e2d8be,
 195  	0x157a63e0, 0x99b8941, 0x1da7d345, 0xcc6cd0, 0x10beed9a, 0x48e83c0, 0x13aa2e25, 0x7cad710, 0x4029988,
 196  	0x13dfa9dd, 0xb94f884, 0x1f4adfef, 0xb88543, 0x16f5f8dc, 0xa6a67f4, 0x14e274e2, 0x5e56cf4, 0x2f24ef,
 197  	0x1e9ef967, 0xfe09bad, 0xfe079b3, 0xcc0ae9e, 0xb3edf6d, 0x3e961bc, 0x130d7831, 0x31043d6, 0xba986f9,
 198  	0x1d28055, 0x65240ca, 0x4971fa3, 0x81b17f8, 0x11ec34a5, 0x8366ddc, 0x1471809, 0xfa5f1c6, 0xc911e15,
 199  	0x8849491, 0xcf4c2e2, 0x14471b91, 0x39f75be, 0x445c21e, 0xf1585e9, 0x72cc11f, 0x4c79f0c, 0xe5522e1,
 200  	0x1874c1ee, 0x4444211, 0x7914884, 0x3d1b133, 0x25ba3c, 0x4194f65, 0x1c0457ef, 0xac4899d, 0xe1fa66c,
 201  	0x130a7918, 0x9b8d312, 0x4b1c5c8, 0x61ccac3, 0x18c8aa6f, 0xe93cb0a, 0xdccb12c, 0xde10825, 0x969737d,
 202  	0xf58c0c3, 0x7cee6a9, 0xc2c329a, 0xc7f9ed9, 0x107b3981, 0x696a40e, 0x152847ff, 0x4d88754, 0xb141f47,
 203  	0x5a16ffe, 0x3a7870a, 0x18667659, 0x3b72b03, 0xb1c9435, 0x9285394, 0xa00005a, 0x37506c, 0x2edc0bb,
 204  	0x19afe392, 0xeb39cac, 0x177ef286, 0xdf87197, 0x19f844ed, 0x31fe8, 0x15f9bfd, 0x80dbec, 0x342e96e,
 205  	0x497aced, 0xe88e909, 0x1f5fa9ba, 0x530a6ee, 0x1ef4e3f1, 0x69ffd12, 0x583006d, 0x2ecc9b1, 0x362db70,
 206  	0x18c7bdc5, 0xf4bb3c5, 0x1c90b957, 0xf067c09, 0x9768f2b, 0xf73566a, 0x1939a900, 0x198c38a, 0x202a2a1,
 207  	0x4bbf5a6, 0x4e265bc, 0x1f44b6e7, 0x185ca49, 0xa39e81b, 0x24aff5b, 0x4acc9c2, 0x638bdd3, 0xb65b2a8,
 208  	0x6def8be, 0xb94537a, 0x10b81dee, 0xe00ec55, 0x2f2cdf7, 0xc20622d, 0x2d20f36, 0xe03c8c9, 0x898ea76,
 209  	0x8e3921b, 0x8905bff, 0x1e94b6c8, 0xee7ad86, 0x154797f2, 0xa620863, 0x3fbd0d9, 0x1f3caab, 0x30c24bd,
 210  	0x19d3892f, 0x59c17a2, 0x1ab4b0ae, 0xf8714ee, 0x90c4098, 0xa9c800d, 0x1910236b, 0xea808d3, 0x9ae2f31,
 211  	0x1a15ad64, 0xa48c8d1, 0x184635a4, 0xb725ef1, 0x11921dcc, 0x3f866df, 0x16c27568, 0xbdf580a, 0xb08f55c,
 212  	0x186ee1c, 0xb1627fa, 0x34e82f6, 0x933837e, 0xf311be5, 0xfedb03b, 0x167f72cd, 0xa5469c0, 0x9c82531,
 213  	0xb92a24b, 0x14fdc8b, 0x141980d1, 0xbdc3a49, 0x7e02bb1, 0xaf4e6dd, 0x106d99e1, 0xd4616fc, 0x93c2717,
 214  	0x1c0a0507, 0xc6d5fed, 0x9a03d8b, 0xa1d22b0, 0x127853e3, 0xc4ac6b8, 0x1a048cf7, 0x9afb72c, 0x65d485d,
 215  	0x72d5998, 0xe9fa744, 0xe49e82c, 0x253cf80, 0x5f777ce, 0xa3799a5, 0x17270cbb, 0xc1d1ef0, 0xdf74977,
 216  	0x114cb859, 0xfa8e037, 0xb8f3fe5, 0xc734cc6, 0x70d3d61, 0xeadac62, 0x12093dd0, 0x9add67d, 0x87200d6,
 217  	0x175bcbb, 0xb29b49f, 0x1806b79c, 0x12fb61f, 0x170b3a10, 0x3aaf1cf, 0xa224085, 0x79d26af, 0x97759e2,
 218  	0x92e19f1, 0xb32714d, 0x1f00d9f1, 0xc728619, 0x9e6f627, 0xe745e24, 0x18ea4ace, 0xfc60a41, 0x125f5b2,
 219  	0xc3cf512, 0x39ed486, 0xf4d15fa, 0xf9167fd, 0x1c1f5dd5, 0xc21a53e, 0x1897930, 0x957a112, 0x21059a0,
 220  	0x1f9e3ddc, 0xa4dfced, 0x8427f6f, 0x726fbe7, 0x1ea658f8, 0x2fdcd4c, 0x17e9b66f, 0xb2e7c2e, 0x39923bf,
 221  	0x1bae104, 0x3973ce5, 0xc6f264c, 0x3511b84, 0x124195d7, 0x11996bd, 0x20be23d, 0xdc437c4, 0x4b4f16b,
 222  	0x11902a0, 0x6c29cc9, 0x1d5ffbe6, 0xdb0b4c7, 0x10144c14, 0x2f2b719, 0x301189, 0x2343336, 0xa0bf2ac,
 223  }
 224  
 225  func sm2P256GetScalar(b *[32]byte, a []byte) {
 226  	var scalarBytes []byte
 227  
 228  	n := new(big.Int).SetBytes(a)
 229  	if n.Cmp(sm2P256.N) >= 0 {
 230  		n.Mod(n, sm2P256.N)
 231  		scalarBytes = n.Bytes()
 232  	} else {
 233  		scalarBytes = a
 234  	}
 235  	for i, v := range scalarBytes {
 236  		b[len(scalarBytes)-(1+i)] = v
 237  	}
 238  }
 239  
 240  func sm2P256PointAddMixed(xOut, yOut, zOut, x1, y1, z1, x2, y2 *sm2P256FieldElement) {
 241  	var z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp sm2P256FieldElement
 242  
 243  	sm2P256Square(&z1z1, z1)
 244  	sm2P256Add(&tmp, z1, z1)
 245  
 246  	sm2P256Mul(&u2, x2, &z1z1)
 247  	sm2P256Mul(&z1z1z1, z1, &z1z1)
 248  	sm2P256Mul(&s2, y2, &z1z1z1)
 249  	sm2P256Sub(&h, &u2, x1)
 250  	sm2P256Add(&i, &h, &h)
 251  	sm2P256Square(&i, &i)
 252  	sm2P256Mul(&j, &h, &i)
 253  	sm2P256Sub(&r, &s2, y1)
 254  	sm2P256Add(&r, &r, &r)
 255  	sm2P256Mul(&v, x1, &i)
 256  
 257  	sm2P256Mul(zOut, &tmp, &h)
 258  	sm2P256Square(&rr, &r)
 259  	sm2P256Sub(xOut, &rr, &j)
 260  	sm2P256Sub(xOut, xOut, &v)
 261  	sm2P256Sub(xOut, xOut, &v)
 262  
 263  	sm2P256Sub(&tmp, &v, xOut)
 264  	sm2P256Mul(yOut, &tmp, &r)
 265  	sm2P256Mul(&tmp, y1, &j)
 266  	sm2P256Sub(yOut, yOut, &tmp)
 267  	sm2P256Sub(yOut, yOut, &tmp)
 268  }
 269  
 270  // sm2P256CopyConditional sets out=in if mask = 0xffffffff in constant time.
 271  //
 272  // On entry: mask is either 0 or 0xffffffff.
 273  func sm2P256CopyConditional(out, in *sm2P256FieldElement, mask uint32) {
 274  	for i := 0; i < 9; i++ {
 275  		tmp := mask & (in[i] ^ out[i])
 276  		out[i] ^= tmp
 277  	}
 278  }
 279  
 280  // sm2P256SelectAffinePoint sets {out_x,out_y} to the index'th entry of table.
 281  // On entry: index < 16, table[0] must be zero.
 282  func sm2P256SelectAffinePoint(xOut, yOut *sm2P256FieldElement, table []uint32, index uint32) {
 283  	for i := range xOut {
 284  		xOut[i] = 0
 285  	}
 286  	for i := range yOut {
 287  		yOut[i] = 0
 288  	}
 289  
 290  	for i := uint32(1); i < 16; i++ {
 291  		mask := i ^ index
 292  		mask |= mask >> 2
 293  		mask |= mask >> 1
 294  		mask &= 1
 295  		mask--
 296  		for j := range xOut {
 297  			xOut[j] |= table[0] & mask
 298  			table = table[1:]
 299  		}
 300  		for j := range yOut {
 301  			yOut[j] |= table[0] & mask
 302  			table = table[1:]
 303  		}
 304  	}
 305  }
 306  
 307  // sm2P256SelectJacobianPoint sets {out_x,out_y,out_z} to the index'th entry of
 308  // table.
 309  // On entry: index < 16, table[0] must be zero.
 310  func sm2P256SelectJacobianPoint(xOut, yOut, zOut *sm2P256FieldElement, table *[16][3]sm2P256FieldElement, index uint32) {
 311  	for i := range xOut {
 312  		xOut[i] = 0
 313  	}
 314  	for i := range yOut {
 315  		yOut[i] = 0
 316  	}
 317  	for i := range zOut {
 318  		zOut[i] = 0
 319  	}
 320  
 321  	// The implicit value at index 0 is all zero. We don't need to perform that
 322  	// iteration of the loop because we already set out_* to zero.
 323  	for i := uint32(1); i < 16; i++ {
 324  		mask := i ^ index
 325  		mask |= mask >> 2
 326  		mask |= mask >> 1
 327  		mask &= 1
 328  		mask--
 329  		for j := range xOut {
 330  			xOut[j] |= table[i][0][j] & mask
 331  		}
 332  		for j := range yOut {
 333  			yOut[j] |= table[i][1][j] & mask
 334  		}
 335  		for j := range zOut {
 336  			zOut[j] |= table[i][2][j] & mask
 337  		}
 338  	}
 339  }
 340  
 341  // sm2P256GetBit returns the bit'th bit of scalar.
 342  func sm2P256GetBit(scalar *[32]uint8, bit uint) uint32 {
 343  	return uint32(((scalar[bit>>3]) >> (bit & 7)) & 1)
 344  }
 345  
 346  // sm2P256ScalarBaseMult sets {xOut,yOut,zOut} = scalar*G where scalar is a
 347  // little-endian number. Note that the value of scalar must be less than the
 348  // order of the group.
 349  func sm2P256ScalarBaseMult(xOut, yOut, zOut *sm2P256FieldElement, scalar *[32]uint8) {
 350  	nIsInfinityMask := ^uint32(0)
 351  	var px, py, tx, ty, tz sm2P256FieldElement
 352  	var pIsNoninfiniteMask, mask, tableOffset uint32
 353  
 354  	for i := range xOut {
 355  		xOut[i] = 0
 356  	}
 357  	for i := range yOut {
 358  		yOut[i] = 0
 359  	}
 360  	for i := range zOut {
 361  		zOut[i] = 0
 362  	}
 363  
 364  	// The loop adds bits at positions 0, 64, 128 and 192, followed by
 365  	// positions 32,96,160 and 224 and does this 32 times.
 366  	for i := uint(0); i < 32; i++ {
 367  		if i != 0 {
 368  			sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
 369  		}
 370  		tableOffset = 0
 371  		for j := uint(0); j <= 32; j += 32 {
 372  			bit0 := sm2P256GetBit(scalar, 31-i+j)
 373  			bit1 := sm2P256GetBit(scalar, 95-i+j)
 374  			bit2 := sm2P256GetBit(scalar, 159-i+j)
 375  			bit3 := sm2P256GetBit(scalar, 223-i+j)
 376  			index := bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3)
 377  
 378  			sm2P256SelectAffinePoint(&px, &py, sm2P256Precomputed[tableOffset:], index)
 379  			tableOffset += 30 * 9
 380  
 381  			// Since scalar is less than the order of the group, we know that
 382  			// {xOut,yOut,zOut} != {px,py,1}, unless both are zero, which we handle
 383  			// below.
 384  			sm2P256PointAddMixed(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py)
 385  			// The result of pointAddMixed is incorrect if {xOut,yOut,zOut} is zero
 386  			// (a.k.a.  the point at infinity). We handle that situation by
 387  			// copying the point from the table.
 388  			sm2P256CopyConditional(xOut, &px, nIsInfinityMask)
 389  			sm2P256CopyConditional(yOut, &py, nIsInfinityMask)
 390  			sm2P256CopyConditional(zOut, &sm2P256Factor[1], nIsInfinityMask)
 391  
 392  			// Equally, the result is also wrong if the point from the table is
 393  			// zero, which happens when the index is zero. We handle that by
 394  			// only copying from {tx,ty,tz} to {xOut,yOut,zOut} if index != 0.
 395  			pIsNoninfiniteMask = nonZeroToAllOnes(index)
 396  			mask = pIsNoninfiniteMask & ^nIsInfinityMask
 397  			sm2P256CopyConditional(xOut, &tx, mask)
 398  			sm2P256CopyConditional(yOut, &ty, mask)
 399  			sm2P256CopyConditional(zOut, &tz, mask)
 400  			// If p was not zero, then n is now non-zero.
 401  			nIsInfinityMask &^= pIsNoninfiniteMask
 402  		}
 403  	}
 404  }
 405  
 406  func sm2P256PointToAffine(xOut, yOut, x, y, z *sm2P256FieldElement) {
 407  	var zInv, zInvSq sm2P256FieldElement
 408  
 409  	zz := sm2P256ToBig(z)
 410  	zz.ModInverse(zz, sm2P256.P)
 411  	sm2P256FromBig(&zInv, zz)
 412  
 413  	sm2P256Square(&zInvSq, &zInv)
 414  	sm2P256Mul(xOut, x, &zInvSq)
 415  	sm2P256Mul(&zInv, &zInv, &zInvSq)
 416  	sm2P256Mul(yOut, y, &zInv)
 417  }
 418  
 419  func sm2P256ToAffine(x, y, z *sm2P256FieldElement) (xOut, yOut *big.Int) {
 420  	var xx, yy sm2P256FieldElement
 421  
 422  	sm2P256PointToAffine(&xx, &yy, x, y, z)
 423  	return sm2P256ToBig(&xx), sm2P256ToBig(&yy)
 424  }
 425  
 426  var sm2P256Factor = []sm2P256FieldElement{
 427  	sm2P256FieldElement{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
 428  	sm2P256FieldElement{0x2, 0x0, 0x1FFFFF00, 0x7FF, 0x0, 0x0, 0x0, 0x2000000, 0x0},
 429  	sm2P256FieldElement{0x4, 0x0, 0x1FFFFE00, 0xFFF, 0x0, 0x0, 0x0, 0x4000000, 0x0},
 430  	sm2P256FieldElement{0x6, 0x0, 0x1FFFFD00, 0x17FF, 0x0, 0x0, 0x0, 0x6000000, 0x0},
 431  	sm2P256FieldElement{0x8, 0x0, 0x1FFFFC00, 0x1FFF, 0x0, 0x0, 0x0, 0x8000000, 0x0},
 432  	sm2P256FieldElement{0xA, 0x0, 0x1FFFFB00, 0x27FF, 0x0, 0x0, 0x0, 0xA000000, 0x0},
 433  	sm2P256FieldElement{0xC, 0x0, 0x1FFFFA00, 0x2FFF, 0x0, 0x0, 0x0, 0xC000000, 0x0},
 434  	sm2P256FieldElement{0xE, 0x0, 0x1FFFF900, 0x37FF, 0x0, 0x0, 0x0, 0xE000000, 0x0},
 435  	sm2P256FieldElement{0x10, 0x0, 0x1FFFF800, 0x3FFF, 0x0, 0x0, 0x0, 0x0, 0x01},
 436  }
 437  
 438  func sm2P256Scalar(b *sm2P256FieldElement, a int) {
 439  	sm2P256Mul(b, b, &sm2P256Factor[a])
 440  }
 441  
 442  // (x3, y3, z3) = (x1, y1, z1) + (x2, y2, z2)
 443  func sm2P256PointAdd(x1, y1, z1, x2, y2, z2, x3, y3, z3 *sm2P256FieldElement) {
 444  	var u1, u2, z22, z12, z23, z13, s1, s2, h, h2, r, r2, tm sm2P256FieldElement
 445  
 446  	if sm2P256ToBig(z1).Sign() == 0 {
 447  		sm2P256Dup(x3, x2)
 448  		sm2P256Dup(y3, y2)
 449  		sm2P256Dup(z3, z2)
 450  		return
 451  	}
 452  
 453  	if sm2P256ToBig(z2).Sign() == 0 {
 454  		sm2P256Dup(x3, x1)
 455  		sm2P256Dup(y3, y1)
 456  		sm2P256Dup(z3, z1)
 457  		return
 458  	}
 459  
 460  	sm2P256Square(&z12, z1) // z12 = z1 ^ 2
 461  	sm2P256Square(&z22, z2) // z22 = z2 ^ 2
 462  
 463  	sm2P256Mul(&z13, &z12, z1) // z13 = z1 ^ 3
 464  	sm2P256Mul(&z23, &z22, z2) // z23 = z2 ^ 3
 465  
 466  	sm2P256Mul(&u1, x1, &z22) // u1 = x1 * z2 ^ 2
 467  	sm2P256Mul(&u2, x2, &z12) // u2 = x2 * z1 ^ 2
 468  
 469  	sm2P256Mul(&s1, y1, &z23) // s1 = y1 * z2 ^ 3
 470  	sm2P256Mul(&s2, y2, &z13) // s2 = y2 * z1 ^ 3
 471  
 472  	if sm2P256ToBig(&u1).Cmp(sm2P256ToBig(&u2)) == 0 &&
 473  		sm2P256ToBig(&s1).Cmp(sm2P256ToBig(&s2)) == 0 {
 474  		sm2P256PointDouble(x1, y1, z1, x1, y1, z1)
 475  	}
 476  
 477  	sm2P256Sub(&h, &u2, &u1) // h = u2 - u1
 478  	sm2P256Sub(&r, &s2, &s1) // r = s2 - s1
 479  
 480  	sm2P256Square(&r2, &r) // r2 = r ^ 2
 481  	sm2P256Square(&h2, &h) // h2 = h ^ 2
 482  
 483  	sm2P256Mul(&tm, &h2, &h) // tm = h ^ 3
 484  	sm2P256Sub(x3, &r2, &tm)
 485  	sm2P256Mul(&tm, &u1, &h2)
 486  	sm2P256Scalar(&tm, 2)   // tm = 2 * (u1 * h ^ 2)
 487  	sm2P256Sub(x3, x3, &tm) // x3 = r ^ 2 - h ^ 3 - 2 * u1 * h ^ 2
 488  
 489  	sm2P256Mul(&tm, &u1, &h2) // tm = u1 * h ^ 2
 490  	sm2P256Sub(&tm, &tm, x3)  // tm = u1 * h ^ 2 - x3
 491  	sm2P256Mul(y3, &r, &tm)
 492  	sm2P256Mul(&tm, &h2, &h)  // tm = h ^ 3
 493  	sm2P256Mul(&tm, &tm, &s1) // tm = s1 * h ^ 3
 494  	sm2P256Sub(y3, y3, &tm)   // y3 = r * (u1 * h ^ 2 - x3) - s1 * h ^ 3
 495  
 496  	sm2P256Mul(z3, z1, z2)
 497  	sm2P256Mul(z3, z3, &h) // z3 = z1 * z3 * h
 498  }
 499  
 500  // (x3, y3, z3) = (x1, y1, z1)- (x2, y2, z2)
 501  func sm2P256PointSub(x1, y1, z1, x2, y2, z2, x3, y3, z3 *sm2P256FieldElement) {
 502  	var u1, u2, z22, z12, z23, z13, s1, s2, h, h2, r, r2, tm sm2P256FieldElement
 503  	y:=sm2P256ToBig(y2)
 504  	zero:=new(big.Int).SetInt64(0)
 505  	y.Sub(zero,y)
 506  	sm2P256FromBig(y2,y)
 507  
 508  	if sm2P256ToBig(z1).Sign() == 0 {
 509  		sm2P256Dup(x3, x2)
 510  		sm2P256Dup(y3, y2)
 511  		sm2P256Dup(z3, z2)
 512  		return
 513  	}
 514  
 515  	if sm2P256ToBig(z2).Sign() == 0 {
 516  		sm2P256Dup(x3, x1)
 517  		sm2P256Dup(y3, y1)
 518  		sm2P256Dup(z3, z1)
 519  		return
 520  	}
 521  
 522  	sm2P256Square(&z12, z1) // z12 = z1 ^ 2
 523  	sm2P256Square(&z22, z2) // z22 = z2 ^ 2
 524  
 525  	sm2P256Mul(&z13, &z12, z1) // z13 = z1 ^ 3
 526  	sm2P256Mul(&z23, &z22, z2) // z23 = z2 ^ 3
 527  
 528  	sm2P256Mul(&u1, x1, &z22) // u1 = x1 * z2 ^ 2
 529  	sm2P256Mul(&u2, x2, &z12) // u2 = x2 * z1 ^ 2
 530  
 531  	sm2P256Mul(&s1, y1, &z23) // s1 = y1 * z2 ^ 3
 532  	sm2P256Mul(&s2, y2, &z13) // s2 = y2 * z1 ^ 3
 533  
 534  	if sm2P256ToBig(&u1).Cmp(sm2P256ToBig(&u2)) == 0 &&
 535  		sm2P256ToBig(&s1).Cmp(sm2P256ToBig(&s2)) == 0 {
 536  		sm2P256PointDouble(x1, y1, z1, x1, y1, z1)
 537  	}
 538  
 539  	sm2P256Sub(&h, &u2, &u1) // h = u2 - u1
 540  	sm2P256Sub(&r, &s2, &s1) // r = s2 - s1
 541  
 542  	sm2P256Square(&r2, &r) // r2 = r ^ 2
 543  	sm2P256Square(&h2, &h) // h2 = h ^ 2
 544  
 545  	sm2P256Mul(&tm, &h2, &h) // tm = h ^ 3
 546  	sm2P256Sub(x3, &r2, &tm)
 547  	sm2P256Mul(&tm, &u1, &h2)
 548  	sm2P256Scalar(&tm, 2)   // tm = 2 * (u1 * h ^ 2)
 549  	sm2P256Sub(x3, x3, &tm) // x3 = r ^ 2 - h ^ 3 - 2 * u1 * h ^ 2
 550  
 551  	sm2P256Mul(&tm, &u1, &h2) // tm = u1 * h ^ 2
 552  	sm2P256Sub(&tm, &tm, x3)  // tm = u1 * h ^ 2 - x3
 553  	sm2P256Mul(y3, &r, &tm)
 554  	sm2P256Mul(&tm, &h2, &h)  // tm = h ^ 3
 555  	sm2P256Mul(&tm, &tm, &s1) // tm = s1 * h ^ 3
 556  	sm2P256Sub(y3, y3, &tm)   // y3 = r * (u1 * h ^ 2 - x3) - s1 * h ^ 3
 557  
 558  	sm2P256Mul(z3, z1, z2)
 559  	sm2P256Mul(z3, z3, &h) // z3 = z1 * z3 * h
 560  }
 561  
 562  func sm2P256PointDouble(x3, y3, z3, x, y, z *sm2P256FieldElement) {
 563  	var s, m, m2, x2, y2, z2, z4, y4, az4 sm2P256FieldElement
 564  
 565  	sm2P256Square(&x2, x) // x2 = x ^ 2
 566  	sm2P256Square(&y2, y) // y2 = y ^ 2
 567  	sm2P256Square(&z2, z) // z2 = z ^ 2
 568  
 569  	sm2P256Square(&z4, z)   // z4 = z ^ 2
 570  	sm2P256Mul(&z4, &z4, z) // z4 = z ^ 3
 571  	sm2P256Mul(&z4, &z4, z) // z4 = z ^ 4
 572  
 573  	sm2P256Square(&y4, y)   // y4 = y ^ 2
 574  	sm2P256Mul(&y4, &y4, y) // y4 = y ^ 3
 575  	sm2P256Mul(&y4, &y4, y) // y4 = y ^ 4
 576  	sm2P256Scalar(&y4, 8)   // y4 = 8 * y ^ 4
 577  
 578  	sm2P256Mul(&s, x, &y2)
 579  	sm2P256Scalar(&s, 4) // s = 4 * x * y ^ 2
 580  
 581  	sm2P256Dup(&m, &x2)
 582  	sm2P256Scalar(&m, 3)
 583  	sm2P256Mul(&az4, &sm2P256.a, &z4)
 584  	sm2P256Add(&m, &m, &az4) // m = 3 * x ^ 2 + a * z ^ 4
 585  
 586  	sm2P256Square(&m2, &m) // m2 = m ^ 2
 587  
 588  	sm2P256Add(z3, y, z)
 589  	sm2P256Square(z3, z3)
 590  	sm2P256Sub(z3, z3, &z2)
 591  	sm2P256Sub(z3, z3, &y2) // z' = (y + z) ^2 - z ^ 2 - y ^ 2
 592  
 593  	sm2P256Sub(x3, &m2, &s)
 594  	sm2P256Sub(x3, x3, &s) // x' = m2 - 2 * s
 595  
 596  	sm2P256Sub(y3, &s, x3)
 597  	sm2P256Mul(y3, y3, &m)
 598  	sm2P256Sub(y3, y3, &y4) // y' = m * (s - x') - 8 * y ^ 4
 599  }
 600  
 601  // p256Zero31 is 0 mod p.
 602  var sm2P256Zero31 = sm2P256FieldElement{0x7FFFFFF8, 0x3FFFFFFC, 0x800003FC, 0x3FFFDFFC, 0x7FFFFFFC, 0x3FFFFFFC, 0x7FFFFFFC, 0x37FFFFFC, 0x7FFFFFFC}
 603  
 604  // c = a + b
 605  func sm2P256Add(c, a, b *sm2P256FieldElement) {
 606  	carry := uint32(0)
 607  	for i := 0; ; i++ {
 608  		c[i] = a[i] + b[i]
 609  		c[i] += carry
 610  		carry = c[i] >> 29
 611  		c[i] &= bottom29Bits
 612  		i++
 613  		if i == 9 {
 614  			break
 615  		}
 616  		c[i] = a[i] + b[i]
 617  		c[i] += carry
 618  		carry = c[i] >> 28
 619  		c[i] &= bottom28Bits
 620  	}
 621  	sm2P256ReduceCarry(c, carry)
 622  }
 623  
 624  // c = a - b
 625  func sm2P256Sub(c, a, b *sm2P256FieldElement) {
 626  	var carry uint32
 627  
 628  	for i := 0; ; i++ {
 629  		c[i] = a[i] - b[i]
 630  		c[i] += sm2P256Zero31[i]
 631  		c[i] += carry
 632  		carry = c[i] >> 29
 633  		c[i] &= bottom29Bits
 634  		i++
 635  		if i == 9 {
 636  			break
 637  		}
 638  		c[i] = a[i] - b[i]
 639  		c[i] += sm2P256Zero31[i]
 640  		c[i] += carry
 641  		carry = c[i] >> 28
 642  		c[i] &= bottom28Bits
 643  	}
 644  	sm2P256ReduceCarry(c, carry)
 645  }
 646  
 647  // c = a * b
 648  func sm2P256Mul(c, a, b *sm2P256FieldElement) {
 649  	var tmp sm2P256LargeFieldElement
 650  
 651  	tmp[0] = uint64(a[0]) * uint64(b[0])
 652  	tmp[1] = uint64(a[0])*(uint64(b[1])<<0) +
 653  		uint64(a[1])*(uint64(b[0])<<0)
 654  	tmp[2] = uint64(a[0])*(uint64(b[2])<<0) +
 655  		uint64(a[1])*(uint64(b[1])<<1) +
 656  		uint64(a[2])*(uint64(b[0])<<0)
 657  	tmp[3] = uint64(a[0])*(uint64(b[3])<<0) +
 658  		uint64(a[1])*(uint64(b[2])<<0) +
 659  		uint64(a[2])*(uint64(b[1])<<0) +
 660  		uint64(a[3])*(uint64(b[0])<<0)
 661  	tmp[4] = uint64(a[0])*(uint64(b[4])<<0) +
 662  		uint64(a[1])*(uint64(b[3])<<1) +
 663  		uint64(a[2])*(uint64(b[2])<<0) +
 664  		uint64(a[3])*(uint64(b[1])<<1) +
 665  		uint64(a[4])*(uint64(b[0])<<0)
 666  	tmp[5] = uint64(a[0])*(uint64(b[5])<<0) +
 667  		uint64(a[1])*(uint64(b[4])<<0) +
 668  		uint64(a[2])*(uint64(b[3])<<0) +
 669  		uint64(a[3])*(uint64(b[2])<<0) +
 670  		uint64(a[4])*(uint64(b[1])<<0) +
 671  		uint64(a[5])*(uint64(b[0])<<0)
 672  	tmp[6] = uint64(a[0])*(uint64(b[6])<<0) +
 673  		uint64(a[1])*(uint64(b[5])<<1) +
 674  		uint64(a[2])*(uint64(b[4])<<0) +
 675  		uint64(a[3])*(uint64(b[3])<<1) +
 676  		uint64(a[4])*(uint64(b[2])<<0) +
 677  		uint64(a[5])*(uint64(b[1])<<1) +
 678  		uint64(a[6])*(uint64(b[0])<<0)
 679  	tmp[7] = uint64(a[0])*(uint64(b[7])<<0) +
 680  		uint64(a[1])*(uint64(b[6])<<0) +
 681  		uint64(a[2])*(uint64(b[5])<<0) +
 682  		uint64(a[3])*(uint64(b[4])<<0) +
 683  		uint64(a[4])*(uint64(b[3])<<0) +
 684  		uint64(a[5])*(uint64(b[2])<<0) +
 685  		uint64(a[6])*(uint64(b[1])<<0) +
 686  		uint64(a[7])*(uint64(b[0])<<0)
 687  	// tmp[8] has the greatest value but doesn't overflow. See logic in
 688  	// p256Square.
 689  	tmp[8] = uint64(a[0])*(uint64(b[8])<<0) +
 690  		uint64(a[1])*(uint64(b[7])<<1) +
 691  		uint64(a[2])*(uint64(b[6])<<0) +
 692  		uint64(a[3])*(uint64(b[5])<<1) +
 693  		uint64(a[4])*(uint64(b[4])<<0) +
 694  		uint64(a[5])*(uint64(b[3])<<1) +
 695  		uint64(a[6])*(uint64(b[2])<<0) +
 696  		uint64(a[7])*(uint64(b[1])<<1) +
 697  		uint64(a[8])*(uint64(b[0])<<0)
 698  	tmp[9] = uint64(a[1])*(uint64(b[8])<<0) +
 699  		uint64(a[2])*(uint64(b[7])<<0) +
 700  		uint64(a[3])*(uint64(b[6])<<0) +
 701  		uint64(a[4])*(uint64(b[5])<<0) +
 702  		uint64(a[5])*(uint64(b[4])<<0) +
 703  		uint64(a[6])*(uint64(b[3])<<0) +
 704  		uint64(a[7])*(uint64(b[2])<<0) +
 705  		uint64(a[8])*(uint64(b[1])<<0)
 706  	tmp[10] = uint64(a[2])*(uint64(b[8])<<0) +
 707  		uint64(a[3])*(uint64(b[7])<<1) +
 708  		uint64(a[4])*(uint64(b[6])<<0) +
 709  		uint64(a[5])*(uint64(b[5])<<1) +
 710  		uint64(a[6])*(uint64(b[4])<<0) +
 711  		uint64(a[7])*(uint64(b[3])<<1) +
 712  		uint64(a[8])*(uint64(b[2])<<0)
 713  	tmp[11] = uint64(a[3])*(uint64(b[8])<<0) +
 714  		uint64(a[4])*(uint64(b[7])<<0) +
 715  		uint64(a[5])*(uint64(b[6])<<0) +
 716  		uint64(a[6])*(uint64(b[5])<<0) +
 717  		uint64(a[7])*(uint64(b[4])<<0) +
 718  		uint64(a[8])*(uint64(b[3])<<0)
 719  	tmp[12] = uint64(a[4])*(uint64(b[8])<<0) +
 720  		uint64(a[5])*(uint64(b[7])<<1) +
 721  		uint64(a[6])*(uint64(b[6])<<0) +
 722  		uint64(a[7])*(uint64(b[5])<<1) +
 723  		uint64(a[8])*(uint64(b[4])<<0)
 724  	tmp[13] = uint64(a[5])*(uint64(b[8])<<0) +
 725  		uint64(a[6])*(uint64(b[7])<<0) +
 726  		uint64(a[7])*(uint64(b[6])<<0) +
 727  		uint64(a[8])*(uint64(b[5])<<0)
 728  	tmp[14] = uint64(a[6])*(uint64(b[8])<<0) +
 729  		uint64(a[7])*(uint64(b[7])<<1) +
 730  		uint64(a[8])*(uint64(b[6])<<0)
 731  	tmp[15] = uint64(a[7])*(uint64(b[8])<<0) +
 732  		uint64(a[8])*(uint64(b[7])<<0)
 733  	tmp[16] = uint64(a[8]) * (uint64(b[8]) << 0)
 734  	sm2P256ReduceDegree(c, &tmp)
 735  }
 736  
 737  // b = a * a
 738  func sm2P256Square(b, a *sm2P256FieldElement) {
 739  	var tmp sm2P256LargeFieldElement
 740  
 741  	tmp[0] = uint64(a[0]) * uint64(a[0])
 742  	tmp[1] = uint64(a[0]) * (uint64(a[1]) << 1)
 743  	tmp[2] = uint64(a[0])*(uint64(a[2])<<1) +
 744  		uint64(a[1])*(uint64(a[1])<<1)
 745  	tmp[3] = uint64(a[0])*(uint64(a[3])<<1) +
 746  		uint64(a[1])*(uint64(a[2])<<1)
 747  	tmp[4] = uint64(a[0])*(uint64(a[4])<<1) +
 748  		uint64(a[1])*(uint64(a[3])<<2) +
 749  		uint64(a[2])*uint64(a[2])
 750  	tmp[5] = uint64(a[0])*(uint64(a[5])<<1) +
 751  		uint64(a[1])*(uint64(a[4])<<1) +
 752  		uint64(a[2])*(uint64(a[3])<<1)
 753  	tmp[6] = uint64(a[0])*(uint64(a[6])<<1) +
 754  		uint64(a[1])*(uint64(a[5])<<2) +
 755  		uint64(a[2])*(uint64(a[4])<<1) +
 756  		uint64(a[3])*(uint64(a[3])<<1)
 757  	tmp[7] = uint64(a[0])*(uint64(a[7])<<1) +
 758  		uint64(a[1])*(uint64(a[6])<<1) +
 759  		uint64(a[2])*(uint64(a[5])<<1) +
 760  		uint64(a[3])*(uint64(a[4])<<1)
 761  	// tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60,
 762  	// which is < 2**64 as required.
 763  	tmp[8] = uint64(a[0])*(uint64(a[8])<<1) +
 764  		uint64(a[1])*(uint64(a[7])<<2) +
 765  		uint64(a[2])*(uint64(a[6])<<1) +
 766  		uint64(a[3])*(uint64(a[5])<<2) +
 767  		uint64(a[4])*uint64(a[4])
 768  	tmp[9] = uint64(a[1])*(uint64(a[8])<<1) +
 769  		uint64(a[2])*(uint64(a[7])<<1) +
 770  		uint64(a[3])*(uint64(a[6])<<1) +
 771  		uint64(a[4])*(uint64(a[5])<<1)
 772  	tmp[10] = uint64(a[2])*(uint64(a[8])<<1) +
 773  		uint64(a[3])*(uint64(a[7])<<2) +
 774  		uint64(a[4])*(uint64(a[6])<<1) +
 775  		uint64(a[5])*(uint64(a[5])<<1)
 776  	tmp[11] = uint64(a[3])*(uint64(a[8])<<1) +
 777  		uint64(a[4])*(uint64(a[7])<<1) +
 778  		uint64(a[5])*(uint64(a[6])<<1)
 779  	tmp[12] = uint64(a[4])*(uint64(a[8])<<1) +
 780  		uint64(a[5])*(uint64(a[7])<<2) +
 781  		uint64(a[6])*uint64(a[6])
 782  	tmp[13] = uint64(a[5])*(uint64(a[8])<<1) +
 783  		uint64(a[6])*(uint64(a[7])<<1)
 784  	tmp[14] = uint64(a[6])*(uint64(a[8])<<1) +
 785  		uint64(a[7])*(uint64(a[7])<<1)
 786  	tmp[15] = uint64(a[7]) * (uint64(a[8]) << 1)
 787  	tmp[16] = uint64(a[8]) * uint64(a[8])
 788  	sm2P256ReduceDegree(b, &tmp)
 789  }
 790  
 791  // nonZeroToAllOnes returns:
 792  //   0xffffffff for 0 < x <= 2**31
 793  //   0 for x == 0 or x > 2**31.
 794  func nonZeroToAllOnes(x uint32) uint32 {
 795  	return ((x - 1) >> 31) - 1
 796  }
 797  
 798  var sm2P256Carry = [8 * 9]uint32{
 799  	0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
 800  	0x2, 0x0, 0x1FFFFF00, 0x7FF, 0x0, 0x0, 0x0, 0x2000000, 0x0,
 801  	0x4, 0x0, 0x1FFFFE00, 0xFFF, 0x0, 0x0, 0x0, 0x4000000, 0x0,
 802  	0x6, 0x0, 0x1FFFFD00, 0x17FF, 0x0, 0x0, 0x0, 0x6000000, 0x0,
 803  	0x8, 0x0, 0x1FFFFC00, 0x1FFF, 0x0, 0x0, 0x0, 0x8000000, 0x0,
 804  	0xA, 0x0, 0x1FFFFB00, 0x27FF, 0x0, 0x0, 0x0, 0xA000000, 0x0,
 805  	0xC, 0x0, 0x1FFFFA00, 0x2FFF, 0x0, 0x0, 0x0, 0xC000000, 0x0,
 806  	0xE, 0x0, 0x1FFFF900, 0x37FF, 0x0, 0x0, 0x0, 0xE000000, 0x0,
 807  }
 808  
 809  // carry < 2 ^ 3
 810  func sm2P256ReduceCarry(a *sm2P256FieldElement, carry uint32) {
 811  	a[0] += sm2P256Carry[carry*9+0]
 812  	a[2] += sm2P256Carry[carry*9+2]
 813  	a[3] += sm2P256Carry[carry*9+3]
 814  	a[7] += sm2P256Carry[carry*9+7]
 815  }
 816  
 817  
 818  func sm2P256ReduceDegree(a *sm2P256FieldElement, b *sm2P256LargeFieldElement) {
 819  	var tmp [18]uint32
 820  	var carry, x, xMask uint32
 821  
 822  	// tmp
 823  	// 0  | 1  | 2  | 3  | 4  | 5  | 6  | 7  | 8  |  9 | 10 ...
 824  	// 29 | 28 | 29 | 28 | 29 | 28 | 29 | 28 | 29 | 28 | 29 ...
 825  	tmp[0] = uint32(b[0]) & bottom29Bits
 826  	tmp[1] = uint32(b[0]) >> 29
 827  	tmp[1] |= (uint32(b[0]>>32) << 3) & bottom28Bits
 828  	tmp[1] += uint32(b[1]) & bottom28Bits
 829  	carry = tmp[1] >> 28
 830  	tmp[1] &= bottom28Bits
 831  	for i := 2; i < 17; i++ {
 832  		tmp[i] = (uint32(b[i-2] >> 32)) >> 25
 833  		tmp[i] += (uint32(b[i-1])) >> 28
 834  		tmp[i] += (uint32(b[i-1]>>32) << 4) & bottom29Bits
 835  		tmp[i] += uint32(b[i]) & bottom29Bits
 836  		tmp[i] += carry
 837  		carry = tmp[i] >> 29
 838  		tmp[i] &= bottom29Bits
 839  
 840  		i++
 841  		if i == 17 {
 842  			break
 843  		}
 844  		tmp[i] = uint32(b[i-2]>>32) >> 25
 845  		tmp[i] += uint32(b[i-1]) >> 29
 846  		tmp[i] += ((uint32(b[i-1] >> 32)) << 3) & bottom28Bits
 847  		tmp[i] += uint32(b[i]) & bottom28Bits
 848  		tmp[i] += carry
 849  		carry = tmp[i] >> 28
 850  		tmp[i] &= bottom28Bits
 851  	}
 852  	tmp[17] = uint32(b[15]>>32) >> 25
 853  	tmp[17] += uint32(b[16]) >> 29
 854  	tmp[17] += uint32(b[16]>>32) << 3
 855  	tmp[17] += carry
 856  
 857  	for i := 0; ; i += 2 {
 858  
 859  		tmp[i+1] += tmp[i] >> 29
 860  		x = tmp[i] & bottom29Bits
 861  		tmp[i] = 0
 862  		if x > 0 {
 863  			set4 := uint32(0)
 864  			set7 := uint32(0)
 865  			xMask = nonZeroToAllOnes(x)
 866  			tmp[i+2] += (x << 7) & bottom29Bits
 867  			tmp[i+3] += x >> 22
 868  			if tmp[i+3] < 0x10000000 {
 869  				set4 = 1
 870  				tmp[i+3] += 0x10000000 & xMask
 871  				tmp[i+3] -= (x << 10) & bottom28Bits
 872  			} else {
 873  				tmp[i+3] -= (x << 10) & bottom28Bits
 874  			}
 875  			if tmp[i+4] < 0x20000000 {
 876  				tmp[i+4] += 0x20000000 & xMask
 877  				tmp[i+4] -= set4 // 借位
 878  				tmp[i+4] -= x >> 18
 879  				if tmp[i+5] < 0x10000000 {
 880  					tmp[i+5] += 0x10000000 & xMask
 881  					tmp[i+5] -= 1 // 借位
 882  					if tmp[i+6] < 0x20000000 {
 883  						set7 = 1
 884  						tmp[i+6] += 0x20000000 & xMask
 885  						tmp[i+6] -= 1 // 借位
 886  					} else {
 887  						tmp[i+6] -= 1 // 借位
 888  					}
 889  				} else {
 890  					tmp[i+5] -= 1
 891  				}
 892  			} else {
 893  				tmp[i+4] -= set4 // 借位
 894  				tmp[i+4] -= x >> 18
 895  			}
 896  			if tmp[i+7] < 0x10000000 {
 897  				tmp[i+7] += 0x10000000 & xMask
 898  				tmp[i+7] -= set7
 899  				tmp[i+7] -= (x << 24) & bottom28Bits
 900  				tmp[i+8] += (x << 28) & bottom29Bits
 901  				if tmp[i+8] < 0x20000000 {
 902  					tmp[i+8] += 0x20000000 & xMask
 903  					tmp[i+8] -= 1
 904  					tmp[i+8] -= x >> 4
 905  					tmp[i+9] += ((x >> 1) - 1) & xMask
 906  				} else {
 907  					tmp[i+8] -= 1
 908  					tmp[i+8] -= x >> 4
 909  					tmp[i+9] += (x >> 1) & xMask
 910  				}
 911  			} else {
 912  				tmp[i+7] -= set7 // 借位
 913  				tmp[i+7] -= (x << 24) & bottom28Bits
 914  				tmp[i+8] += (x << 28) & bottom29Bits
 915  				if tmp[i+8] < 0x20000000 {
 916  					tmp[i+8] += 0x20000000 & xMask
 917  					tmp[i+8] -= x >> 4
 918  					tmp[i+9] += ((x >> 1) - 1) & xMask
 919  				} else {
 920  					tmp[i+8] -= x >> 4
 921  					tmp[i+9] += (x >> 1) & xMask
 922  				}
 923  			}
 924  
 925  		}
 926  
 927  		if i+1 == 9 {
 928  			break
 929  		}
 930  
 931  		tmp[i+2] += tmp[i+1] >> 28
 932  		x = tmp[i+1] & bottom28Bits
 933  		tmp[i+1] = 0
 934  		if x > 0 {
 935  			set5 := uint32(0)
 936  			set8 := uint32(0)
 937  			set9 := uint32(0)
 938  			xMask = nonZeroToAllOnes(x)
 939  			tmp[i+3] += (x << 7) & bottom28Bits
 940  			tmp[i+4] += x >> 21
 941  			if tmp[i+4] < 0x20000000 {
 942  				set5 = 1
 943  				tmp[i+4] += 0x20000000 & xMask
 944  				tmp[i+4] -= (x << 11) & bottom29Bits
 945  			} else {
 946  				tmp[i+4] -= (x << 11) & bottom29Bits
 947  			}
 948  			if tmp[i+5] < 0x10000000 {
 949  				tmp[i+5] += 0x10000000 & xMask
 950  				tmp[i+5] -= set5 // 借位
 951  				tmp[i+5] -= x >> 18
 952  				if tmp[i+6] < 0x20000000 {
 953  					tmp[i+6] += 0x20000000 & xMask
 954  					tmp[i+6] -= 1 // 借位
 955  					if tmp[i+7] < 0x10000000 {
 956  						set8 = 1
 957  						tmp[i+7] += 0x10000000 & xMask
 958  						tmp[i+7] -= 1 // 借位
 959  					} else {
 960  						tmp[i+7] -= 1 // 借位
 961  					}
 962  				} else {
 963  					tmp[i+6] -= 1 // 借位
 964  				}
 965  			} else {
 966  				tmp[i+5] -= set5 // 借位
 967  				tmp[i+5] -= x >> 18
 968  			}
 969  			if tmp[i+8] < 0x20000000 {
 970  				set9 = 1
 971  				tmp[i+8] += 0x20000000 & xMask
 972  				tmp[i+8] -= set8
 973  				tmp[i+8] -= (x << 25) & bottom29Bits
 974  			} else {
 975  				tmp[i+8] -= set8
 976  				tmp[i+8] -= (x << 25) & bottom29Bits
 977  			}
 978  			if tmp[i+9] < 0x10000000 {
 979  				tmp[i+9] += 0x10000000 & xMask
 980  				tmp[i+9] -= set9 // 借位
 981  				tmp[i+9] -= x >> 4
 982  				tmp[i+10] += (x - 1) & xMask
 983  			} else {
 984  				tmp[i+9] -= set9 // 借位
 985  				tmp[i+9] -= x >> 4
 986  				tmp[i+10] += x & xMask
 987  			}
 988  		}
 989  	}
 990  
 991  	carry = uint32(0)
 992  	for i := 0; i < 8; i++ {
 993  		a[i] = tmp[i+9]
 994  		a[i] += carry
 995  		a[i] += (tmp[i+10] << 28) & bottom29Bits
 996  		carry = a[i] >> 29
 997  		a[i] &= bottom29Bits
 998  
 999  		i++
1000  		a[i] = tmp[i+9] >> 1
1001  		a[i] += carry
1002  		carry = a[i] >> 28
1003  		a[i] &= bottom28Bits
1004  	}
1005  	a[8] = tmp[17]
1006  	a[8] += carry
1007  	carry = a[8] >> 29
1008  	a[8] &= bottom29Bits
1009  	sm2P256ReduceCarry(a, carry)
1010  }
1011  
1012  // b = a
1013  func sm2P256Dup(b, a *sm2P256FieldElement) {
1014  	*b = *a
1015  }
1016  
1017  // X = a * R mod P
1018  func sm2P256FromBig(X *sm2P256FieldElement, a *big.Int) {
1019  	x := new(big.Int).Lsh(a, 257)
1020  	x.Mod(x, sm2P256.P)
1021  	for i := 0; i < 9; i++ {
1022  		if bits := x.Bits(); len(bits) > 0 {
1023  			X[i] = uint32(bits[0]) & bottom29Bits
1024  		} else {
1025  			X[i] = 0
1026  		}
1027  		x.Rsh(x, 29)
1028  		i++
1029  		if i == 9 {
1030  			break
1031  		}
1032  		if bits := x.Bits(); len(bits) > 0 {
1033  			X[i] = uint32(bits[0]) & bottom28Bits
1034  		} else {
1035  			X[i] = 0
1036  		}
1037  		x.Rsh(x, 28)
1038  	}
1039  }
1040  
1041  // X = r * R mod P
1042  // r = X * R' mod P
1043  func sm2P256ToBig(X *sm2P256FieldElement) *big.Int {
1044  	r, tm := new(big.Int), new(big.Int)
1045  	r.SetInt64(int64(X[8]))
1046  	for i := 7; i >= 0; i-- {
1047  		if (i & 1) == 0 {
1048  			r.Lsh(r, 29)
1049  		} else {
1050  			r.Lsh(r, 28)
1051  		}
1052  		tm.SetInt64(int64(X[i]))
1053  		r.Add(r, tm)
1054  	}
1055  	r.Mul(r, sm2P256.RInverse)
1056  	r.Mod(r, sm2P256.P)
1057  	return r
1058  }
1059  func WNafReversed(wnaf []int8) []int8 {
1060  	wnafRev := make([]int8, len(wnaf), len(wnaf))
1061  	for i, v := range wnaf {
1062  		wnafRev[len(wnaf)-(1+i)] = v
1063  	}
1064  	return wnafRev
1065  }
1066  func sm2GenrateWNaf(b []byte) []int8 {
1067  	n:= new(big.Int).SetBytes(b)
1068  	var k *big.Int
1069  	if n.Cmp(sm2P256.N) >= 0 {
1070  		n.Mod(n, sm2P256.N)
1071  		k = n
1072  	} else {
1073  		k = n
1074  	}
1075  	wnaf := make([]int8, k.BitLen()+1, k.BitLen()+1)
1076  	if k.Sign() == 0 {
1077  		return wnaf
1078  	}
1079  	var width, pow2, sign int
1080  	width, pow2, sign = 4, 16, 8
1081  	var mask int64 = 15
1082  	var carry bool
1083  	var length, pos int
1084  	for pos <= k.BitLen() {
1085  		if k.Bit(pos) == boolToUint(carry) {
1086  			pos++
1087  			continue
1088  		}
1089  		k.Rsh(k, uint(pos))
1090  		var digit int
1091  		digit = int(k.Int64() & mask)
1092  		if carry {
1093  			digit++
1094  		}
1095  		carry = (digit & sign) != 0
1096  		if carry {
1097  			digit -= pow2
1098  		}
1099  		length += pos
1100  		wnaf[length] = int8(digit)
1101  		pos = int(width)
1102  	}
1103  	if len(wnaf) > length+1 {
1104  		t := make([]int8, length+1, length+1)
1105  		copy(t, wnaf[0:length+1])
1106  		wnaf = t
1107  	}
1108  	return wnaf
1109  }
1110  func boolToUint(b bool) uint {
1111  	if b {
1112  		return 1
1113  	}
1114  	return 0
1115  }
1116  func abs(a int8) uint32{
1117  	if a<0 {
1118  		return uint32(-a)
1119  	}
1120  	return uint32(a)
1121  }
1122  
1123  func sm2P256ScalarMult(xOut, yOut, zOut, x, y *sm2P256FieldElement, scalar []int8) {
1124  	var precomp [16][3]sm2P256FieldElement
1125  	var px, py, pz, tx, ty, tz sm2P256FieldElement
1126  	var nIsInfinityMask, index, pIsNoninfiniteMask, mask uint32
1127  
1128  	// We precompute 0,1,2,... times {x,y}.
1129  	precomp[1][0] = *x
1130  	precomp[1][1] = *y
1131  	precomp[1][2] = sm2P256Factor[1]
1132  
1133  	for i := 2; i < 8; i += 2 {
1134  		sm2P256PointDouble(&precomp[i][0], &precomp[i][1], &precomp[i][2], &precomp[i/2][0], &precomp[i/2][1], &precomp[i/2][2])
1135  		sm2P256PointAddMixed(&precomp[i+1][0], &precomp[i+1][1], &precomp[i+1][2], &precomp[i][0], &precomp[i][1], &precomp[i][2], x, y)
1136  	}
1137  
1138  	for i := range xOut {
1139  		xOut[i] = 0
1140  	}
1141  	for i := range yOut {
1142  		yOut[i] = 0
1143  	}
1144  	for i := range zOut {
1145  		zOut[i] = 0
1146  	}
1147  	nIsInfinityMask = ^uint32(0)
1148  	var zeroes int16
1149  	for i := 0; i<len(scalar); i++ {
1150  		if scalar[i] ==0{
1151  			zeroes++
1152  			continue
1153  		}
1154  		if(zeroes>0){
1155  			for  ;zeroes>0;zeroes-- {
1156  				sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
1157  			}
1158  		}
1159  		index = abs(scalar[i])
1160  		sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
1161  		sm2P256SelectJacobianPoint(&px, &py, &pz, &precomp, index)
1162  		if scalar[i] > 0 {
1163  			sm2P256PointAdd(xOut, yOut, zOut, &px, &py, &pz, &tx, &ty, &tz)
1164  		} else {
1165  			sm2P256PointSub(xOut, yOut, zOut, &px, &py, &pz, &tx, &ty, &tz)
1166  		}
1167  		sm2P256CopyConditional(xOut, &px, nIsInfinityMask)
1168  		sm2P256CopyConditional(yOut, &py, nIsInfinityMask)
1169  		sm2P256CopyConditional(zOut, &pz, nIsInfinityMask)
1170  		pIsNoninfiniteMask = nonZeroToAllOnes(index)
1171  		mask = pIsNoninfiniteMask & ^nIsInfinityMask
1172  		sm2P256CopyConditional(xOut, &tx, mask)
1173  		sm2P256CopyConditional(yOut, &ty, mask)
1174  		sm2P256CopyConditional(zOut, &tz, mask)
1175  		nIsInfinityMask &^= pIsNoninfiniteMask
1176  	}
1177  	if(zeroes>0){
1178  		for  ;zeroes>0;zeroes-- {
1179  			sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
1180  		}
1181  	}
1182  }
1183