nat_amd64_asm.mx raw

   1  // Copyright 2023 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package main
   6  
   7  import (
   8  	"strconv"
   9  
  10  	. "github.com/mmcloughlin/avo/build"
  11  	. "github.com/mmcloughlin/avo/operand"
  12  	. "github.com/mmcloughlin/avo/reg"
  13  )
  14  
  15  //go:generate go run . -out ../nat_amd64.s -pkg bigmod
  16  
  17  func main() {
  18  	Package("crypto/internal/fips140/bigmod")
  19  	ConstraintExpr("!purego")
  20  
  21  	addMulVVW(1024)
  22  	addMulVVW(1536)
  23  	addMulVVW(2048)
  24  
  25  	Generate()
  26  }
  27  
  28  func addMulVVW(bits int) {
  29  	if bits%64 != 0 {
  30  		panic("bit size unsupported")
  31  	}
  32  
  33  	Implement("addMulVVW" + strconv.Itoa(bits))
  34  
  35  	CMPB(Mem{Symbol: Symbol{Name: "·supportADX"}, Base: StaticBase}, Imm(1))
  36  	JEQ(LabelRef("adx"))
  37  
  38  	z := Mem{Base: Load(Param("z"), GP64())}
  39  	x := Mem{Base: Load(Param("x"), GP64())}
  40  	y := Load(Param("y"), GP64())
  41  
  42  	carry := GP64()
  43  	XORQ(carry, carry) // zero out carry
  44  
  45  	for i := 0; i < bits/64; i++ {
  46  		Comment("Iteration " + strconv.Itoa(i))
  47  		hi, lo := RDX, RAX // implicit MULQ inputs and outputs
  48  		MOVQ(x.Offset(i*8), lo)
  49  		MULQ(y)
  50  		ADDQ(z.Offset(i*8), lo)
  51  		ADCQ(Imm(0), hi)
  52  		ADDQ(carry, lo)
  53  		ADCQ(Imm(0), hi)
  54  		MOVQ(hi, carry)
  55  		MOVQ(lo, z.Offset(i*8))
  56  	}
  57  
  58  	Store(carry, ReturnIndex(0))
  59  	RET()
  60  
  61  	Label("adx")
  62  
  63  	// The ADX strategy implements the following function, where c1 and c2 are
  64  	// the overflow and the carry flag respectively.
  65  	//
  66  	//    func addMulVVW(z, x []uint, y uint) (carry uint) {
  67  	//        var c1, c2 uint
  68  	//        for i := range z {
  69  	//            hi, lo := bits.Mul(x[i], y)
  70  	//            lo, c1 = bits.Add(lo, z[i], c1)
  71  	//            z[i], c2 = bits.Add(lo, carry, c2)
  72  	//            carry = hi
  73  	//        }
  74  	//        return carry + c1 + c2
  75  	//    }
  76  	//
  77  	// The loop is fully unrolled and the hi / carry registers are alternated
  78  	// instead of introducing a MOV.
  79  
  80  	z = Mem{Base: Load(Param("z"), GP64())}
  81  	x = Mem{Base: Load(Param("x"), GP64())}
  82  	Load(Param("y"), RDX) // implicit source of MULXQ
  83  
  84  	carry = GP64()
  85  	XORQ(carry, carry) // zero out carry
  86  	z0 := GP64()
  87  	XORQ(z0, z0) // unset flags and zero out z0
  88  
  89  	for i := 0; i < bits/64; i++ {
  90  		hi, lo := GP64(), GP64()
  91  
  92  		Comment("Iteration " + strconv.Itoa(i))
  93  		MULXQ(x.Offset(i*8), lo, hi)
  94  		ADCXQ(carry, lo)
  95  		ADOXQ(z.Offset(i*8), lo)
  96  		MOVQ(lo, z.Offset(i*8))
  97  
  98  		i++
  99  
 100  		Comment("Iteration " + strconv.Itoa(i))
 101  		MULXQ(x.Offset(i*8), lo, carry)
 102  		ADCXQ(hi, lo)
 103  		ADOXQ(z.Offset(i*8), lo)
 104  		MOVQ(lo, z.Offset(i*8))
 105  	}
 106  
 107  	Comment("Add back carry flags and return")
 108  	ADCXQ(z0, carry)
 109  	ADOXQ(z0, carry)
 110  
 111  	Store(carry, ReturnIndex(0))
 112  	RET()
 113  }
 114