nat_amd64_asm.mx raw
1 // Copyright 2023 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package main
6
7 import (
8 "strconv"
9
10 . "github.com/mmcloughlin/avo/build"
11 . "github.com/mmcloughlin/avo/operand"
12 . "github.com/mmcloughlin/avo/reg"
13 )
14
15 //go:generate go run . -out ../nat_amd64.s -pkg bigmod
16
17 func main() {
18 Package("crypto/internal/fips140/bigmod")
19 ConstraintExpr("!purego")
20
21 addMulVVW(1024)
22 addMulVVW(1536)
23 addMulVVW(2048)
24
25 Generate()
26 }
27
28 func addMulVVW(bits int) {
29 if bits%64 != 0 {
30 panic("bit size unsupported")
31 }
32
33 Implement("addMulVVW" + strconv.Itoa(bits))
34
35 CMPB(Mem{Symbol: Symbol{Name: "·supportADX"}, Base: StaticBase}, Imm(1))
36 JEQ(LabelRef("adx"))
37
38 z := Mem{Base: Load(Param("z"), GP64())}
39 x := Mem{Base: Load(Param("x"), GP64())}
40 y := Load(Param("y"), GP64())
41
42 carry := GP64()
43 XORQ(carry, carry) // zero out carry
44
45 for i := 0; i < bits/64; i++ {
46 Comment("Iteration " + strconv.Itoa(i))
47 hi, lo := RDX, RAX // implicit MULQ inputs and outputs
48 MOVQ(x.Offset(i*8), lo)
49 MULQ(y)
50 ADDQ(z.Offset(i*8), lo)
51 ADCQ(Imm(0), hi)
52 ADDQ(carry, lo)
53 ADCQ(Imm(0), hi)
54 MOVQ(hi, carry)
55 MOVQ(lo, z.Offset(i*8))
56 }
57
58 Store(carry, ReturnIndex(0))
59 RET()
60
61 Label("adx")
62
63 // The ADX strategy implements the following function, where c1 and c2 are
64 // the overflow and the carry flag respectively.
65 //
66 // func addMulVVW(z, x []uint, y uint) (carry uint) {
67 // var c1, c2 uint
68 // for i := range z {
69 // hi, lo := bits.Mul(x[i], y)
70 // lo, c1 = bits.Add(lo, z[i], c1)
71 // z[i], c2 = bits.Add(lo, carry, c2)
72 // carry = hi
73 // }
74 // return carry + c1 + c2
75 // }
76 //
77 // The loop is fully unrolled and the hi / carry registers are alternated
78 // instead of introducing a MOV.
79
80 z = Mem{Base: Load(Param("z"), GP64())}
81 x = Mem{Base: Load(Param("x"), GP64())}
82 Load(Param("y"), RDX) // implicit source of MULXQ
83
84 carry = GP64()
85 XORQ(carry, carry) // zero out carry
86 z0 := GP64()
87 XORQ(z0, z0) // unset flags and zero out z0
88
89 for i := 0; i < bits/64; i++ {
90 hi, lo := GP64(), GP64()
91
92 Comment("Iteration " + strconv.Itoa(i))
93 MULXQ(x.Offset(i*8), lo, hi)
94 ADCXQ(carry, lo)
95 ADOXQ(z.Offset(i*8), lo)
96 MOVQ(lo, z.Offset(i*8))
97
98 i++
99
100 Comment("Iteration " + strconv.Itoa(i))
101 MULXQ(x.Offset(i*8), lo, carry)
102 ADCXQ(hi, lo)
103 ADOXQ(z.Offset(i*8), lo)
104 MOVQ(lo, z.Offset(i*8))
105 }
106
107 Comment("Add back carry flags and return")
108 ADCXQ(z0, carry)
109 ADOXQ(z0, carry)
110
111 Store(carry, ReturnIndex(0))
112 RET()
113 }
114