sha256block_arm64.s raw

   1  // Copyright 2017 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  //go:build !purego
   6  
   7  #include "textflag.h"
   8  
   9  #define HASHUPDATE \
  10  	SHA256H	V9.S4, V3, V2 \
  11  	SHA256H2	V9.S4, V8, V3 \
  12  	VMOV	V2.B16, V8.B16
  13  
  14  // func blockSHA2(dig *Digest, p []byte)
  15  TEXT ·blockSHA2(SB),NOSPLIT,$0
  16  	MOVD	dig+0(FP), R0                              // Hash value first address
  17  	MOVD	p_base+8(FP), R1                           // message first address
  18  	MOVD	p_len+16(FP), R3                           // message length
  19  	MOVD	$·_K+0(SB), R2                             // k constants first address
  20  	VLD1	(R0), [V0.S4, V1.S4]                       // load h(a,b,c,d,e,f,g,h)
  21  	VLD1.P	64(R2), [V16.S4, V17.S4, V18.S4, V19.S4]
  22  	VLD1.P	64(R2), [V20.S4, V21.S4, V22.S4, V23.S4]
  23  	VLD1.P	64(R2), [V24.S4, V25.S4, V26.S4, V27.S4]
  24  	VLD1	(R2), [V28.S4, V29.S4, V30.S4, V31.S4]     //load 64*4bytes K constant(K0-K63)
  25  
  26  blockloop:
  27  
  28  	VLD1.P	16(R1), [V4.B16]                            // load 16bytes message
  29  	VLD1.P	16(R1), [V5.B16]                            // load 16bytes message
  30  	VLD1.P	16(R1), [V6.B16]                            // load 16bytes message
  31  	VLD1.P	16(R1), [V7.B16]                            // load 16bytes message
  32  	VMOV	V0.B16, V2.B16                              // backup: VO h(dcba)
  33  	VMOV	V1.B16, V3.B16                              // backup: V1 h(hgfe)
  34  	VMOV	V2.B16, V8.B16
  35  	VREV32	V4.B16, V4.B16                              // prepare for using message in Byte format
  36  	VREV32	V5.B16, V5.B16
  37  	VREV32	V6.B16, V6.B16
  38  	VREV32	V7.B16, V7.B16
  39  
  40  	VADD	V16.S4, V4.S4, V9.S4                        // V18(W0+K0...W3+K3)
  41  	SHA256SU0	V5.S4, V4.S4                        // V4: (su0(W1)+W0,...,su0(W4)+W3)
  42  	HASHUPDATE                                          // H4
  43  
  44  	VADD	V17.S4, V5.S4, V9.S4                        // V18(W4+K4...W7+K7)
  45  	SHA256SU0	V6.S4, V5.S4                        // V5: (su0(W5)+W4,...,su0(W8)+W7)
  46  	SHA256SU1	V7.S4, V6.S4, V4.S4                 // V4: W16-W19
  47  	HASHUPDATE                                          // H8
  48  
  49  	VADD	V18.S4, V6.S4, V9.S4                        // V18(W8+K8...W11+K11)
  50  	SHA256SU0	V7.S4, V6.S4                        // V6: (su0(W9)+W8,...,su0(W12)+W11)
  51  	SHA256SU1	V4.S4, V7.S4, V5.S4                 // V5: W20-W23
  52  	HASHUPDATE                                          // H12
  53  
  54  	VADD	V19.S4, V7.S4, V9.S4                        // V18(W12+K12...W15+K15)
  55  	SHA256SU0	V4.S4, V7.S4                        // V7: (su0(W13)+W12,...,su0(W16)+W15)
  56  	SHA256SU1	V5.S4, V4.S4, V6.S4                 // V6: W24-W27
  57  	HASHUPDATE                                          // H16
  58  
  59  	VADD	V20.S4, V4.S4, V9.S4                        // V18(W16+K16...W19+K19)
  60  	SHA256SU0	V5.S4, V4.S4                        // V4: (su0(W17)+W16,...,su0(W20)+W19)
  61  	SHA256SU1	V6.S4, V5.S4, V7.S4                 // V7: W28-W31
  62  	HASHUPDATE                                          // H20
  63  
  64  	VADD	V21.S4, V5.S4, V9.S4                        // V18(W20+K20...W23+K23)
  65  	SHA256SU0	V6.S4, V5.S4                        // V5: (su0(W21)+W20,...,su0(W24)+W23)
  66  	SHA256SU1	V7.S4, V6.S4, V4.S4                 // V4: W32-W35
  67  	HASHUPDATE                                          // H24
  68  
  69  	VADD	V22.S4, V6.S4, V9.S4                        // V18(W24+K24...W27+K27)
  70  	SHA256SU0	V7.S4, V6.S4                        // V6: (su0(W25)+W24,...,su0(W28)+W27)
  71  	SHA256SU1	V4.S4, V7.S4, V5.S4                 // V5: W36-W39
  72  	HASHUPDATE                                          // H28
  73  
  74  	VADD	V23.S4, V7.S4, V9.S4                        // V18(W28+K28...W31+K31)
  75  	SHA256SU0	V4.S4, V7.S4                        // V7: (su0(W29)+W28,...,su0(W32)+W31)
  76  	SHA256SU1	V5.S4, V4.S4, V6.S4                 // V6: W40-W43
  77  	HASHUPDATE                                          // H32
  78  
  79  	VADD	V24.S4, V4.S4, V9.S4                        // V18(W32+K32...W35+K35)
  80  	SHA256SU0	V5.S4, V4.S4                        // V4: (su0(W33)+W32,...,su0(W36)+W35)
  81  	SHA256SU1	V6.S4, V5.S4, V7.S4                 // V7: W44-W47
  82  	HASHUPDATE                                          // H36
  83  
  84  	VADD	V25.S4, V5.S4, V9.S4                        // V18(W36+K36...W39+K39)
  85  	SHA256SU0	V6.S4, V5.S4                        // V5: (su0(W37)+W36,...,su0(W40)+W39)
  86  	SHA256SU1	V7.S4, V6.S4, V4.S4                 // V4: W48-W51
  87  	HASHUPDATE                                          // H40
  88  
  89  	VADD	V26.S4, V6.S4, V9.S4                        // V18(W40+K40...W43+K43)
  90  	SHA256SU0	V7.S4, V6.S4                        // V6: (su0(W41)+W40,...,su0(W44)+W43)
  91  	SHA256SU1	V4.S4, V7.S4, V5.S4                 // V5: W52-W55
  92  	HASHUPDATE                                          // H44
  93  
  94  	VADD	V27.S4, V7.S4, V9.S4                        // V18(W44+K44...W47+K47)
  95  	SHA256SU0	V4.S4, V7.S4                        // V7: (su0(W45)+W44,...,su0(W48)+W47)
  96  	SHA256SU1	V5.S4, V4.S4, V6.S4                 // V6: W56-W59
  97  	HASHUPDATE                                          // H48
  98  
  99  	VADD	V28.S4, V4.S4, V9.S4                        // V18(W48+K48,...,W51+K51)
 100  	HASHUPDATE                                          // H52
 101  	SHA256SU1	V6.S4, V5.S4, V7.S4                 // V7: W60-W63
 102  
 103  	VADD	V29.S4, V5.S4, V9.S4                        // V18(W52+K52,...,W55+K55)
 104  	HASHUPDATE                                          // H56
 105  
 106  	VADD	V30.S4, V6.S4, V9.S4                        // V18(W59+K59,...,W59+K59)
 107  	HASHUPDATE                                          // H60
 108  
 109  	VADD	V31.S4, V7.S4, V9.S4                        // V18(W60+K60,...,W63+K63)
 110  	HASHUPDATE                                          // H64
 111  
 112  	SUB	$64, R3, R3                                 // message length - 64bytes, then compare with 64bytes
 113  	VADD	V2.S4, V0.S4, V0.S4
 114  	VADD	V3.S4, V1.S4, V1.S4
 115  	CBNZ	R3, blockloop
 116  
 117  sha256ret:
 118  
 119  	VST1	[V0.S4, V1.S4], (R0)                       // store hash value H
 120  	RET
 121  
 122