sha1block_amd64_shani.mx raw

   1  // Copyright 2024 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package main
   6  
   7  import (
   8  	"fmt"
   9  
  10  	. "github.com/mmcloughlin/avo/build"
  11  	. "github.com/mmcloughlin/avo/operand"
  12  	. "github.com/mmcloughlin/avo/reg"
  13  )
  14  
  15  // Implement the SHA-1 block function using the Intel(R) SHA extensions
  16  // (SHA1RNDS4, SHA1NEXTE, SHA1MSG1, and SHA1MSG2). This implementation requires
  17  // the AVX, SHA, SSE2, SSE4.1, and SSSE3 extensions.
  18  //
  19  // Reference:
  20  // S. Gulley, et al, "New Instructions Supporting the Secure Hash
  21  // Algorithm on IntelĀ® Architecture Processors", July 2013
  22  // https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html
  23  
  24  func blockSHANI() {
  25  	Implement("blockSHANI")
  26  
  27  	digest := Load(Param("dig"), RDI)
  28  	data := Load(Param("p").Base(), RSI)
  29  	len := Load(Param("p").Len(), RDX)
  30  
  31  	abcd := XMM()
  32  	msg0, msg1, msg2, msg3 := XMM(), XMM(), XMM(), XMM()
  33  	e0, e1 := XMM(), XMM()
  34  	shufMask := XMM()
  35  
  36  	CMPQ(len, Imm(0))
  37  	JEQ(LabelRef("done"))
  38  	ADDQ(data, len)
  39  
  40  	stackPtr := GP64()
  41  	{
  42  		Comment("Allocate space on the stack for saving ABCD and E0, and align it to 16 bytes")
  43  		local := AllocLocal(32 + 16)
  44  		LEAQ(local.Offset(15), stackPtr)
  45  		tmp := GP64()
  46  		MOVQ(U64(15), tmp)
  47  		NOTQ(tmp)
  48  		ANDQ(tmp, stackPtr)
  49  	}
  50  	e0_save := Mem{Base: stackPtr}
  51  	abcd_save := Mem{Base: stackPtr}.Offset(16)
  52  
  53  	Comment("Load initial hash state")
  54  	PINSRD(Imm(3), Mem{Base: digest}.Offset(16), e0)
  55  	VMOVDQU(Mem{Base: digest}, abcd)
  56  	PAND(upperMask(), e0)
  57  	PSHUFD(Imm(0x1b), abcd, abcd)
  58  
  59  	VMOVDQA(flipMask(), shufMask)
  60  
  61  	Label("loop")
  62  
  63  	Comment("Save ABCD and E working values")
  64  	VMOVDQA(e0, e0_save)
  65  	VMOVDQA(abcd, abcd_save)
  66  
  67  	Comment("Rounds 0-3")
  68  	VMOVDQU(Mem{Base: data}, msg0)
  69  	PSHUFB(shufMask, msg0)
  70  	PADDD(msg0, e0)
  71  	VMOVDQA(abcd, e1)
  72  	SHA1RNDS4(Imm(0), e0, abcd)
  73  
  74  	Comment("Rounds 4-7")
  75  	VMOVDQU(Mem{Base: data}.Offset(16), msg1)
  76  	PSHUFB(shufMask, msg1)
  77  	SHA1NEXTE(msg1, e1)
  78  	VMOVDQA(abcd, e0)
  79  	SHA1RNDS4(Imm(0), e1, abcd)
  80  	SHA1MSG1(msg1, msg0)
  81  
  82  	Comment("Rounds 8-11")
  83  	VMOVDQU(Mem{Base: data}.Offset(16*2), msg2)
  84  	PSHUFB(shufMask, msg2)
  85  	SHA1NEXTE(msg2, e0)
  86  	VMOVDQA(abcd, e1)
  87  	SHA1RNDS4(Imm(0), e0, abcd)
  88  	SHA1MSG1(msg2, msg1)
  89  	PXOR(msg2, msg0)
  90  
  91  	// Rounds 12 through 67 use the same repeated pattern, with e0 and e1 ping-ponging
  92  	// back and forth, and each of the msg temporaries moving up one every four rounds.
  93  	msgs := []VecVirtual{msg3, msg0, msg1, msg2}
  94  	for i := range 14 {
  95  		Comment(fmt.Sprintf("Rounds %d-%d", 12+(i*4), 12+(i*4)+3))
  96  		a, b := e1, e0
  97  		if i == 0 {
  98  			VMOVDQU(Mem{Base: data}.Offset(16*3), msg3)
  99  			PSHUFB(shufMask, msg3)
 100  		}
 101  		if i%2 == 1 {
 102  			a, b = e0, e1
 103  		}
 104  		imm := uint64((12 + i*4) / 20)
 105  
 106  		SHA1NEXTE(msgs[i%4], a)
 107  		VMOVDQA(abcd, b)
 108  		SHA1MSG2(msgs[i%4], msgs[(1+i)%4])
 109  		SHA1RNDS4(Imm(imm), a, abcd)
 110  		SHA1MSG1(msgs[i%4], msgs[(3+i)%4])
 111  		PXOR(msgs[i%4], msgs[(2+i)%4])
 112  	}
 113  
 114  	Comment("Rounds 68-71")
 115  	SHA1NEXTE(msg1, e1)
 116  	VMOVDQA(abcd, e0)
 117  	SHA1MSG2(msg1, msg2)
 118  	SHA1RNDS4(Imm(3), e1, abcd)
 119  	PXOR(msg1, msg3)
 120  
 121  	Comment("Rounds 72-75")
 122  	SHA1NEXTE(msg2, e0)
 123  	VMOVDQA(abcd, e1)
 124  	SHA1MSG2(msg2, msg3)
 125  	SHA1RNDS4(Imm(3), e0, abcd)
 126  
 127  	Comment("Rounds 76-79")
 128  	SHA1NEXTE(msg3, e1)
 129  	VMOVDQA(abcd, e0)
 130  	SHA1RNDS4(Imm(3), e1, abcd)
 131  
 132  	Comment("Add saved E and ABCD")
 133  	SHA1NEXTE(e0_save, e0)
 134  	PADDD(abcd_save, abcd)
 135  
 136  	Comment("Check if we are done, if not return to the loop")
 137  	ADDQ(Imm(64), data)
 138  	CMPQ(data, len)
 139  	JNE(LabelRef("loop"))
 140  
 141  	Comment("Write the hash state back to digest")
 142  	PSHUFD(Imm(0x1b), abcd, abcd)
 143  	VMOVDQU(abcd, Mem{Base: digest})
 144  	PEXTRD(Imm(3), e0, Mem{Base: digest}.Offset(16))
 145  
 146  	Label("done")
 147  	RET()
 148  }
 149  
 150  func flipMask() Mem {
 151  	mask := GLOBL("shuffle_mask", RODATA)
 152  	// 0x000102030405060708090a0b0c0d0e0f
 153  	DATA(0x00, U64(0x08090a0b0c0d0e0f))
 154  	DATA(0x08, U64(0x0001020304050607))
 155  	return mask
 156  }
 157  
 158  func upperMask() Mem {
 159  	mask := GLOBL("upper_mask", RODATA)
 160  	// 0xFFFFFFFF000000000000000000000000
 161  	DATA(0x00, U64(0x0000000000000000))
 162  	DATA(0x08, U64(0xFFFFFFFF00000000))
 163  	return mask
 164  }
 165