sha256block_amd64.s raw

   1  //+build !noasm,!appengine,gc
   2  
   3  // SHA intrinsic version of SHA256
   4  
   5  // Kristofer Peterson, (C) 2018.
   6  //
   7  // Licensed under the Apache License, Version 2.0 (the "License");
   8  // you may not use this file except in compliance with the License.
   9  // You may obtain a copy of the License at
  10  //
  11  //     http://www.apache.org/licenses/LICENSE-2.0
  12  //
  13  // Unless required by applicable law or agreed to in writing, software
  14  // distributed under the License is distributed on an "AS IS" BASIS,
  15  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  // See the License for the specific language governing permissions and
  17  // limitations under the License.
  18  //
  19  
  20  #include "textflag.h"
  21  
  22  DATA K<>+0x00(SB)/4, $0x428a2f98
  23  DATA K<>+0x04(SB)/4, $0x71374491
  24  DATA K<>+0x08(SB)/4, $0xb5c0fbcf
  25  DATA K<>+0x0c(SB)/4, $0xe9b5dba5
  26  DATA K<>+0x10(SB)/4, $0x3956c25b
  27  DATA K<>+0x14(SB)/4, $0x59f111f1
  28  DATA K<>+0x18(SB)/4, $0x923f82a4
  29  DATA K<>+0x1c(SB)/4, $0xab1c5ed5
  30  DATA K<>+0x20(SB)/4, $0xd807aa98
  31  DATA K<>+0x24(SB)/4, $0x12835b01
  32  DATA K<>+0x28(SB)/4, $0x243185be
  33  DATA K<>+0x2c(SB)/4, $0x550c7dc3
  34  DATA K<>+0x30(SB)/4, $0x72be5d74
  35  DATA K<>+0x34(SB)/4, $0x80deb1fe
  36  DATA K<>+0x38(SB)/4, $0x9bdc06a7
  37  DATA K<>+0x3c(SB)/4, $0xc19bf174
  38  DATA K<>+0x40(SB)/4, $0xe49b69c1
  39  DATA K<>+0x44(SB)/4, $0xefbe4786
  40  DATA K<>+0x48(SB)/4, $0x0fc19dc6
  41  DATA K<>+0x4c(SB)/4, $0x240ca1cc
  42  DATA K<>+0x50(SB)/4, $0x2de92c6f
  43  DATA K<>+0x54(SB)/4, $0x4a7484aa
  44  DATA K<>+0x58(SB)/4, $0x5cb0a9dc
  45  DATA K<>+0x5c(SB)/4, $0x76f988da
  46  DATA K<>+0x60(SB)/4, $0x983e5152
  47  DATA K<>+0x64(SB)/4, $0xa831c66d
  48  DATA K<>+0x68(SB)/4, $0xb00327c8
  49  DATA K<>+0x6c(SB)/4, $0xbf597fc7
  50  DATA K<>+0x70(SB)/4, $0xc6e00bf3
  51  DATA K<>+0x74(SB)/4, $0xd5a79147
  52  DATA K<>+0x78(SB)/4, $0x06ca6351
  53  DATA K<>+0x7c(SB)/4, $0x14292967
  54  DATA K<>+0x80(SB)/4, $0x27b70a85
  55  DATA K<>+0x84(SB)/4, $0x2e1b2138
  56  DATA K<>+0x88(SB)/4, $0x4d2c6dfc
  57  DATA K<>+0x8c(SB)/4, $0x53380d13
  58  DATA K<>+0x90(SB)/4, $0x650a7354
  59  DATA K<>+0x94(SB)/4, $0x766a0abb
  60  DATA K<>+0x98(SB)/4, $0x81c2c92e
  61  DATA K<>+0x9c(SB)/4, $0x92722c85
  62  DATA K<>+0xa0(SB)/4, $0xa2bfe8a1
  63  DATA K<>+0xa4(SB)/4, $0xa81a664b
  64  DATA K<>+0xa8(SB)/4, $0xc24b8b70
  65  DATA K<>+0xac(SB)/4, $0xc76c51a3
  66  DATA K<>+0xb0(SB)/4, $0xd192e819
  67  DATA K<>+0xb4(SB)/4, $0xd6990624
  68  DATA K<>+0xb8(SB)/4, $0xf40e3585
  69  DATA K<>+0xbc(SB)/4, $0x106aa070
  70  DATA K<>+0xc0(SB)/4, $0x19a4c116
  71  DATA K<>+0xc4(SB)/4, $0x1e376c08
  72  DATA K<>+0xc8(SB)/4, $0x2748774c
  73  DATA K<>+0xcc(SB)/4, $0x34b0bcb5
  74  DATA K<>+0xd0(SB)/4, $0x391c0cb3
  75  DATA K<>+0xd4(SB)/4, $0x4ed8aa4a
  76  DATA K<>+0xd8(SB)/4, $0x5b9cca4f
  77  DATA K<>+0xdc(SB)/4, $0x682e6ff3
  78  DATA K<>+0xe0(SB)/4, $0x748f82ee
  79  DATA K<>+0xe4(SB)/4, $0x78a5636f
  80  DATA K<>+0xe8(SB)/4, $0x84c87814
  81  DATA K<>+0xec(SB)/4, $0x8cc70208
  82  DATA K<>+0xf0(SB)/4, $0x90befffa
  83  DATA K<>+0xf4(SB)/4, $0xa4506ceb
  84  DATA K<>+0xf8(SB)/4, $0xbef9a3f7
  85  DATA K<>+0xfc(SB)/4, $0xc67178f2
  86  GLOBL K<>(SB), RODATA|NOPTR, $256
  87  
  88  DATA SHUF_MASK<>+0x00(SB)/8, $0x0405060700010203
  89  DATA SHUF_MASK<>+0x08(SB)/8, $0x0c0d0e0f08090a0b
  90  GLOBL SHUF_MASK<>(SB), RODATA|NOPTR, $16
  91  
  92  // Register Usage
  93  // BX  base address of constant table (constant)
  94  // DX  hash_state (constant)
  95  // SI  hash_data.data
  96  // DI  hash_data.data + hash_data.length - 64 (constant)
  97  // X0  scratch
  98  // X1  scratch
  99  // X2  working hash state // ABEF
 100  // X3  working hash state // CDGH
 101  // X4  first 16 bytes of block
 102  // X5  second 16 bytes of block
 103  // X6  third 16 bytes of block
 104  // X7  fourth 16 bytes of block
 105  // X12 saved hash state // ABEF
 106  // X13 saved hash state // CDGH
 107  // X15 data shuffle mask (constant)
 108  
 109  TEXT ·blockIntelSha(SB), NOSPLIT, $0-32
 110  	MOVQ      h+0(FP), DX
 111  	MOVQ      message_base+8(FP), SI
 112  	MOVQ      message_len+16(FP), DI
 113  	LEAQ      -64(SI)(DI*1), DI
 114  	MOVOU     (DX), X2
 115  	MOVOU     16(DX), X1
 116  	MOVO      X2, X3
 117  	PUNPCKLLQ X1, X2
 118  	PUNPCKHLQ X1, X3
 119  	PSHUFD    $0x27, X2, X2
 120  	PSHUFD    $0x27, X3, X3
 121  	MOVO      SHUF_MASK<>(SB), X15
 122  	LEAQ      K<>(SB), BX
 123  
 124  	JMP TEST
 125  
 126  LOOP:
 127  	MOVO X2, X12
 128  	MOVO X3, X13
 129  
 130  	// load block and shuffle
 131  	MOVOU  (SI), X4
 132  	MOVOU  16(SI), X5
 133  	MOVOU  32(SI), X6
 134  	MOVOU  48(SI), X7
 135  	PSHUFB X15, X4
 136  	PSHUFB X15, X5
 137  	PSHUFB X15, X6
 138  	PSHUFB X15, X7
 139  
 140  #define ROUND456 \
 141  	PADDL  X5, X0                    \
 142  	LONG   $0xdacb380f               \ // SHA256RNDS2 XMM3, XMM2
 143  	MOVO   X5, X1                    \
 144  	LONG   $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1, XMM4, 4
 145  	PADDL  X1, X6                    \
 146  	LONG   $0xf5cd380f               \ // SHA256MSG2 XMM6, XMM5
 147  	PSHUFD $0x4e, X0, X0             \
 148  	LONG   $0xd3cb380f               \ // SHA256RNDS2 XMM2, XMM3
 149  	LONG   $0xe5cc380f               // SHA256MSG1 XMM4, XMM5
 150  
 151  #define ROUND567 \
 152  	PADDL  X6, X0                    \
 153  	LONG   $0xdacb380f               \ // SHA256RNDS2 XMM3, XMM2
 154  	MOVO   X6, X1                    \
 155  	LONG   $0x0f3a0f66; WORD $0x04cd \ // PALIGNR XMM1, XMM5, 4
 156  	PADDL  X1, X7                    \
 157  	LONG   $0xfecd380f               \ // SHA256MSG2 XMM7, XMM6
 158  	PSHUFD $0x4e, X0, X0             \
 159  	LONG   $0xd3cb380f               \ // SHA256RNDS2 XMM2, XMM3
 160  	LONG   $0xeecc380f               // SHA256MSG1 XMM5, XMM6
 161  
 162  #define ROUND674 \
 163  	PADDL  X7, X0                    \
 164  	LONG   $0xdacb380f               \ // SHA256RNDS2 XMM3, XMM2
 165  	MOVO   X7, X1                    \
 166  	LONG   $0x0f3a0f66; WORD $0x04ce \ // PALIGNR XMM1, XMM6, 4
 167  	PADDL  X1, X4                    \
 168  	LONG   $0xe7cd380f               \ // SHA256MSG2 XMM4, XMM7
 169  	PSHUFD $0x4e, X0, X0             \
 170  	LONG   $0xd3cb380f               \ // SHA256RNDS2 XMM2, XMM3
 171  	LONG   $0xf7cc380f               // SHA256MSG1 XMM6, XMM7
 172  
 173  #define ROUND745 \
 174  	PADDL  X4, X0                    \
 175  	LONG   $0xdacb380f               \ // SHA256RNDS2 XMM3, XMM2
 176  	MOVO   X4, X1                    \
 177  	LONG   $0x0f3a0f66; WORD $0x04cf \ // PALIGNR XMM1, XMM7, 4
 178  	PADDL  X1, X5                    \
 179  	LONG   $0xeccd380f               \ // SHA256MSG2 XMM5, XMM4
 180  	PSHUFD $0x4e, X0, X0             \
 181  	LONG   $0xd3cb380f               \ // SHA256RNDS2 XMM2, XMM3
 182  	LONG   $0xfccc380f               // SHA256MSG1 XMM7, XMM4
 183  
 184  	// rounds 0-3
 185  	MOVO   (BX), X0
 186  	PADDL  X4, X0
 187  	LONG   $0xdacb380f   // SHA256RNDS2 XMM3, XMM2
 188  	PSHUFD $0x4e, X0, X0
 189  	LONG   $0xd3cb380f   // SHA256RNDS2 XMM2, XMM3
 190  
 191  	// rounds 4-7
 192  	MOVO   1*16(BX), X0
 193  	PADDL  X5, X0
 194  	LONG   $0xdacb380f   // SHA256RNDS2 XMM3, XMM2
 195  	PSHUFD $0x4e, X0, X0
 196  	LONG   $0xd3cb380f   // SHA256RNDS2 XMM2, XMM3
 197  	LONG   $0xe5cc380f   // SHA256MSG1 XMM4, XMM5
 198  
 199  	// rounds 8-11
 200  	MOVO   2*16(BX), X0
 201  	PADDL  X6, X0
 202  	LONG   $0xdacb380f   // SHA256RNDS2 XMM3, XMM2
 203  	PSHUFD $0x4e, X0, X0
 204  	LONG   $0xd3cb380f   // SHA256RNDS2 XMM2, XMM3
 205  	LONG   $0xeecc380f   // SHA256MSG1 XMM5, XMM6
 206  
 207  	MOVO 3*16(BX), X0; ROUND674  // rounds 12-15
 208  	MOVO 4*16(BX), X0; ROUND745  // rounds 16-19
 209  	MOVO 5*16(BX), X0; ROUND456  // rounds 20-23
 210  	MOVO 6*16(BX), X0; ROUND567  // rounds 24-27
 211  	MOVO 7*16(BX), X0; ROUND674  // rounds 28-31
 212  	MOVO 8*16(BX), X0; ROUND745  // rounds 32-35
 213  	MOVO 9*16(BX), X0; ROUND456  // rounds 36-39
 214  	MOVO 10*16(BX), X0; ROUND567 // rounds 40-43
 215  	MOVO 11*16(BX), X0; ROUND674 // rounds 44-47
 216  	MOVO 12*16(BX), X0; ROUND745 // rounds 48-51
 217  
 218  	// rounds 52-55
 219  	MOVO   13*16(BX), X0
 220  	PADDL  X5, X0
 221  	LONG   $0xdacb380f               // SHA256RNDS2 XMM3, XMM2
 222  	MOVO   X5, X1
 223  	LONG   $0x0f3a0f66; WORD $0x04cc // PALIGNR XMM1, XMM4, 4
 224  	PADDL  X1, X6
 225  	LONG   $0xf5cd380f               // SHA256MSG2 XMM6, XMM5
 226  	PSHUFD $0x4e, X0, X0
 227  	LONG   $0xd3cb380f               // SHA256RNDS2 XMM2, XMM3
 228  
 229  	// rounds 56-59
 230  	MOVO   14*16(BX), X0
 231  	PADDL  X6, X0
 232  	LONG   $0xdacb380f               // SHA256RNDS2 XMM3, XMM2
 233  	MOVO   X6, X1
 234  	LONG   $0x0f3a0f66; WORD $0x04cd // PALIGNR XMM1, XMM5, 4
 235  	PADDL  X1, X7
 236  	LONG   $0xfecd380f               // SHA256MSG2 XMM7, XMM6
 237  	PSHUFD $0x4e, X0, X0
 238  	LONG   $0xd3cb380f               // SHA256RNDS2 XMM2, XMM3
 239  
 240  	// rounds 60-63
 241  	MOVO   15*16(BX), X0
 242  	PADDL  X7, X0
 243  	LONG   $0xdacb380f   // SHA256RNDS2 XMM3, XMM2
 244  	PSHUFD $0x4e, X0, X0
 245  	LONG   $0xd3cb380f   // SHA256RNDS2 XMM2, XMM3
 246  
 247  	PADDL X12, X2
 248  	PADDL X13, X3
 249  
 250  	ADDQ $64, SI
 251  
 252  TEST:
 253  	CMPQ SI, DI
 254  	JBE  LOOP
 255  
 256  	PSHUFD $0x4e, X3, X0
 257  	LONG   $0x0e3a0f66; WORD $0xf0c2 // PBLENDW XMM0, XMM2, 0xf0
 258  	PSHUFD $0x4e, X2, X1
 259  	LONG   $0x0e3a0f66; WORD $0x0fcb // PBLENDW XMM1, XMM3, 0x0f
 260  	PSHUFD $0x1b, X0, X0
 261  	PSHUFD $0x1b, X1, X1
 262  
 263  	MOVOU X0, (DX)
 264  	MOVOU X1, 16(DX)
 265  
 266  	RET
 267