sha1block_amd64_shani.mx raw
1 // Copyright 2024 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package main
6
7 import (
8 "fmt"
9
10 . "github.com/mmcloughlin/avo/build"
11 . "github.com/mmcloughlin/avo/operand"
12 . "github.com/mmcloughlin/avo/reg"
13 )
14
15 // Implement the SHA-1 block function using the Intel(R) SHA extensions
16 // (SHA1RNDS4, SHA1NEXTE, SHA1MSG1, and SHA1MSG2). This implementation requires
17 // the AVX, SHA, SSE2, SSE4.1, and SSSE3 extensions.
18 //
19 // Reference:
20 // S. Gulley, et al, "New Instructions Supporting the Secure Hash
21 // Algorithm on IntelĀ® Architecture Processors", July 2013
22 // https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html
23
24 func blockSHANI() {
25 Implement("blockSHANI")
26
27 digest := Load(Param("dig"), RDI)
28 data := Load(Param("p").Base(), RSI)
29 len := Load(Param("p").Len(), RDX)
30
31 abcd := XMM()
32 msg0, msg1, msg2, msg3 := XMM(), XMM(), XMM(), XMM()
33 e0, e1 := XMM(), XMM()
34 shufMask := XMM()
35
36 CMPQ(len, Imm(0))
37 JEQ(LabelRef("done"))
38 ADDQ(data, len)
39
40 stackPtr := GP64()
41 {
42 Comment("Allocate space on the stack for saving ABCD and E0, and align it to 16 bytes")
43 local := AllocLocal(32 + 16)
44 LEAQ(local.Offset(15), stackPtr)
45 tmp := GP64()
46 MOVQ(U64(15), tmp)
47 NOTQ(tmp)
48 ANDQ(tmp, stackPtr)
49 }
50 e0_save := Mem{Base: stackPtr}
51 abcd_save := Mem{Base: stackPtr}.Offset(16)
52
53 Comment("Load initial hash state")
54 PINSRD(Imm(3), Mem{Base: digest}.Offset(16), e0)
55 VMOVDQU(Mem{Base: digest}, abcd)
56 PAND(upperMask(), e0)
57 PSHUFD(Imm(0x1b), abcd, abcd)
58
59 VMOVDQA(flipMask(), shufMask)
60
61 Label("loop")
62
63 Comment("Save ABCD and E working values")
64 VMOVDQA(e0, e0_save)
65 VMOVDQA(abcd, abcd_save)
66
67 Comment("Rounds 0-3")
68 VMOVDQU(Mem{Base: data}, msg0)
69 PSHUFB(shufMask, msg0)
70 PADDD(msg0, e0)
71 VMOVDQA(abcd, e1)
72 SHA1RNDS4(Imm(0), e0, abcd)
73
74 Comment("Rounds 4-7")
75 VMOVDQU(Mem{Base: data}.Offset(16), msg1)
76 PSHUFB(shufMask, msg1)
77 SHA1NEXTE(msg1, e1)
78 VMOVDQA(abcd, e0)
79 SHA1RNDS4(Imm(0), e1, abcd)
80 SHA1MSG1(msg1, msg0)
81
82 Comment("Rounds 8-11")
83 VMOVDQU(Mem{Base: data}.Offset(16*2), msg2)
84 PSHUFB(shufMask, msg2)
85 SHA1NEXTE(msg2, e0)
86 VMOVDQA(abcd, e1)
87 SHA1RNDS4(Imm(0), e0, abcd)
88 SHA1MSG1(msg2, msg1)
89 PXOR(msg2, msg0)
90
91 // Rounds 12 through 67 use the same repeated pattern, with e0 and e1 ping-ponging
92 // back and forth, and each of the msg temporaries moving up one every four rounds.
93 msgs := []VecVirtual{msg3, msg0, msg1, msg2}
94 for i := range 14 {
95 Comment(fmt.Sprintf("Rounds %d-%d", 12+(i*4), 12+(i*4)+3))
96 a, b := e1, e0
97 if i == 0 {
98 VMOVDQU(Mem{Base: data}.Offset(16*3), msg3)
99 PSHUFB(shufMask, msg3)
100 }
101 if i%2 == 1 {
102 a, b = e0, e1
103 }
104 imm := uint64((12 + i*4) / 20)
105
106 SHA1NEXTE(msgs[i%4], a)
107 VMOVDQA(abcd, b)
108 SHA1MSG2(msgs[i%4], msgs[(1+i)%4])
109 SHA1RNDS4(Imm(imm), a, abcd)
110 SHA1MSG1(msgs[i%4], msgs[(3+i)%4])
111 PXOR(msgs[i%4], msgs[(2+i)%4])
112 }
113
114 Comment("Rounds 68-71")
115 SHA1NEXTE(msg1, e1)
116 VMOVDQA(abcd, e0)
117 SHA1MSG2(msg1, msg2)
118 SHA1RNDS4(Imm(3), e1, abcd)
119 PXOR(msg1, msg3)
120
121 Comment("Rounds 72-75")
122 SHA1NEXTE(msg2, e0)
123 VMOVDQA(abcd, e1)
124 SHA1MSG2(msg2, msg3)
125 SHA1RNDS4(Imm(3), e0, abcd)
126
127 Comment("Rounds 76-79")
128 SHA1NEXTE(msg3, e1)
129 VMOVDQA(abcd, e0)
130 SHA1RNDS4(Imm(3), e1, abcd)
131
132 Comment("Add saved E and ABCD")
133 SHA1NEXTE(e0_save, e0)
134 PADDD(abcd_save, abcd)
135
136 Comment("Check if we are done, if not return to the loop")
137 ADDQ(Imm(64), data)
138 CMPQ(data, len)
139 JNE(LabelRef("loop"))
140
141 Comment("Write the hash state back to digest")
142 PSHUFD(Imm(0x1b), abcd, abcd)
143 VMOVDQU(abcd, Mem{Base: digest})
144 PEXTRD(Imm(3), e0, Mem{Base: digest}.Offset(16))
145
146 Label("done")
147 RET()
148 }
149
150 func flipMask() Mem {
151 mask := GLOBL("shuffle_mask", RODATA)
152 // 0x000102030405060708090a0b0c0d0e0f
153 DATA(0x00, U64(0x08090a0b0c0d0e0f))
154 DATA(0x08, U64(0x0001020304050607))
155 return mask
156 }
157
158 func upperMask() Mem {
159 mask := GLOBL("upper_mask", RODATA)
160 // 0xFFFFFFFF000000000000000000000000
161 DATA(0x00, U64(0x0000000000000000))
162 DATA(0x08, U64(0xFFFFFFFF00000000))
163 return mask
164 }
165