sha512block_amd64.s raw

   1  // Code generated by command: go run sha512block_amd64_asm.go -out ../sha512block_amd64.s. DO NOT EDIT.
   2  
   3  //go:build !purego
   4  
   5  #include "textflag.h"
   6  
   7  // func blockAVX2(dig *Digest, p []byte)
   8  // Requires: AVX, AVX2, BMI2
   9  TEXT ·blockAVX2(SB), NOSPLIT, $56-32
  10  	MOVQ    dig+0(FP), SI
  11  	MOVQ    p_base+8(FP), DI
  12  	MOVQ    p_len+16(FP), DX
  13  	SHRQ    $0x07, DX
  14  	SHLQ    $0x07, DX
  15  	JZ      done_hash
  16  	ADDQ    DI, DX
  17  	MOVQ    DX, 48(SP)
  18  	MOVQ    (SI), AX
  19  	MOVQ    8(SI), BX
  20  	MOVQ    16(SI), CX
  21  	MOVQ    24(SI), R8
  22  	MOVQ    32(SI), DX
  23  	MOVQ    40(SI), R9
  24  	MOVQ    48(SI), R10
  25  	MOVQ    56(SI), R11
  26  	VMOVDQU PSHUFFLE_BYTE_FLIP_MASK<>+0(SB), Y9
  27  
  28  loop0:
  29  	MOVQ    $·_K+0(SB), BP
  30  	VMOVDQU (DI), Y4
  31  	VPSHUFB Y9, Y4, Y4
  32  	VMOVDQU 32(DI), Y5
  33  	VPSHUFB Y9, Y5, Y5
  34  	VMOVDQU 64(DI), Y6
  35  	VPSHUFB Y9, Y6, Y6
  36  	VMOVDQU 96(DI), Y7
  37  	VPSHUFB Y9, Y7, Y7
  38  	MOVQ    DI, 40(SP)
  39  	MOVQ    $0x00000004, 32(SP)
  40  
  41  loop1:
  42  	VPADDQ     (BP), Y4, Y0
  43  	VMOVDQU    Y0, (SP)
  44  	VPERM2F128 $0x03, Y6, Y7, Y0
  45  	VPALIGNR   $0x08, Y6, Y0, Y0
  46  	VPADDQ     Y4, Y0, Y0
  47  	VPERM2F128 $0x03, Y4, Y5, Y1
  48  	VPALIGNR   $0x08, Y4, Y1, Y1
  49  	VPSRLQ     $0x01, Y1, Y2
  50  	VPSLLQ     $0x3f, Y1, Y3
  51  	VPOR       Y2, Y3, Y3
  52  	VPSRLQ     $0x07, Y1, Y8
  53  	MOVQ       AX, DI
  54  	RORXQ      $0x29, DX, R13
  55  	RORXQ      $0x12, DX, R14
  56  	ADDQ       (SP), R11
  57  	ORQ        CX, DI
  58  	MOVQ       R9, R15
  59  	RORXQ      $0x22, AX, R12
  60  	XORQ       R14, R13
  61  	XORQ       R10, R15
  62  	RORXQ      $0x0e, DX, R14
  63  	ANDQ       DX, R15
  64  	XORQ       R14, R13
  65  	RORXQ      $0x27, AX, R14
  66  	ADDQ       R11, R8
  67  	ANDQ       BX, DI
  68  	XORQ       R12, R14
  69  	RORXQ      $0x1c, AX, R12
  70  	XORQ       R10, R15
  71  	XORQ       R12, R14
  72  	MOVQ       AX, R12
  73  	ANDQ       CX, R12
  74  	ADDQ       R13, R15
  75  	ORQ        R12, DI
  76  	ADDQ       R14, R11
  77  	ADDQ       R15, R8
  78  	ADDQ       R15, R11
  79  	ADDQ       DI, R11
  80  	VPSRLQ     $0x08, Y1, Y2
  81  	VPSLLQ     $0x38, Y1, Y1
  82  	VPOR       Y2, Y1, Y1
  83  	VPXOR      Y8, Y3, Y3
  84  	VPXOR      Y1, Y3, Y1
  85  	VPADDQ     Y1, Y0, Y0
  86  	VPERM2F128 $0x00, Y0, Y0, Y4
  87  	VPAND      MASK_YMM_LO<>+0(SB), Y0, Y0
  88  	VPERM2F128 $0x11, Y7, Y7, Y2
  89  	VPSRLQ     $0x06, Y2, Y8
  90  	MOVQ       R11, DI
  91  	RORXQ      $0x29, R8, R13
  92  	RORXQ      $0x12, R8, R14
  93  	ADDQ       8(SP), R10
  94  	ORQ        BX, DI
  95  	MOVQ       DX, R15
  96  	RORXQ      $0x22, R11, R12
  97  	XORQ       R14, R13
  98  	XORQ       R9, R15
  99  	RORXQ      $0x0e, R8, R14
 100  	XORQ       R14, R13
 101  	RORXQ      $0x27, R11, R14
 102  	ANDQ       R8, R15
 103  	ADDQ       R10, CX
 104  	ANDQ       AX, DI
 105  	XORQ       R12, R14
 106  	RORXQ      $0x1c, R11, R12
 107  	XORQ       R9, R15
 108  	XORQ       R12, R14
 109  	MOVQ       R11, R12
 110  	ANDQ       BX, R12
 111  	ADDQ       R13, R15
 112  	ORQ        R12, DI
 113  	ADDQ       R14, R10
 114  	ADDQ       R15, CX
 115  	ADDQ       R15, R10
 116  	ADDQ       DI, R10
 117  	VPSRLQ     $0x13, Y2, Y3
 118  	VPSLLQ     $0x2d, Y2, Y1
 119  	VPOR       Y1, Y3, Y3
 120  	VPXOR      Y3, Y8, Y8
 121  	VPSRLQ     $0x3d, Y2, Y3
 122  	VPSLLQ     $0x03, Y2, Y1
 123  	VPOR       Y1, Y3, Y3
 124  	VPXOR      Y3, Y8, Y8
 125  	VPADDQ     Y8, Y4, Y4
 126  	VPSRLQ     $0x06, Y4, Y8
 127  	MOVQ       R10, DI
 128  	RORXQ      $0x29, CX, R13
 129  	ADDQ       16(SP), R9
 130  	RORXQ      $0x12, CX, R14
 131  	ORQ        AX, DI
 132  	MOVQ       R8, R15
 133  	XORQ       DX, R15
 134  	RORXQ      $0x22, R10, R12
 135  	XORQ       R14, R13
 136  	ANDQ       CX, R15
 137  	RORXQ      $0x0e, CX, R14
 138  	ADDQ       R9, BX
 139  	ANDQ       R11, DI
 140  	XORQ       R14, R13
 141  	RORXQ      $0x27, R10, R14
 142  	XORQ       DX, R15
 143  	XORQ       R12, R14
 144  	RORXQ      $0x1c, R10, R12
 145  	XORQ       R12, R14
 146  	MOVQ       R10, R12
 147  	ANDQ       AX, R12
 148  	ADDQ       R13, R15
 149  	ORQ        R12, DI
 150  	ADDQ       R14, R9
 151  	ADDQ       R15, BX
 152  	ADDQ       R15, R9
 153  	ADDQ       DI, R9
 154  	VPSRLQ     $0x13, Y4, Y3
 155  	VPSLLQ     $0x2d, Y4, Y1
 156  	VPOR       Y1, Y3, Y3
 157  	VPXOR      Y3, Y8, Y8
 158  	VPSRLQ     $0x3d, Y4, Y3
 159  	VPSLLQ     $0x03, Y4, Y1
 160  	VPOR       Y1, Y3, Y3
 161  	VPXOR      Y3, Y8, Y8
 162  	VPADDQ     Y8, Y0, Y2
 163  	VPBLENDD   $0xf0, Y2, Y4, Y4
 164  	MOVQ       R9, DI
 165  	RORXQ      $0x29, BX, R13
 166  	RORXQ      $0x12, BX, R14
 167  	ADDQ       24(SP), DX
 168  	ORQ        R11, DI
 169  	MOVQ       CX, R15
 170  	RORXQ      $0x22, R9, R12
 171  	XORQ       R14, R13
 172  	XORQ       R8, R15
 173  	RORXQ      $0x0e, BX, R14
 174  	ANDQ       BX, R15
 175  	ADDQ       DX, AX
 176  	ANDQ       R10, DI
 177  	XORQ       R14, R13
 178  	XORQ       R8, R15
 179  	RORXQ      $0x27, R9, R14
 180  	ADDQ       R13, R15
 181  	XORQ       R12, R14
 182  	ADDQ       R15, AX
 183  	RORXQ      $0x1c, R9, R12
 184  	XORQ       R12, R14
 185  	MOVQ       R9, R12
 186  	ANDQ       R11, R12
 187  	ORQ        R12, DI
 188  	ADDQ       R14, DX
 189  	ADDQ       R15, DX
 190  	ADDQ       DI, DX
 191  	VPADDQ     32(BP), Y5, Y0
 192  	VMOVDQU    Y0, (SP)
 193  	VPERM2F128 $0x03, Y7, Y4, Y0
 194  	VPALIGNR   $0x08, Y7, Y0, Y0
 195  	VPADDQ     Y5, Y0, Y0
 196  	VPERM2F128 $0x03, Y5, Y6, Y1
 197  	VPALIGNR   $0x08, Y5, Y1, Y1
 198  	VPSRLQ     $0x01, Y1, Y2
 199  	VPSLLQ     $0x3f, Y1, Y3
 200  	VPOR       Y2, Y3, Y3
 201  	VPSRLQ     $0x07, Y1, Y8
 202  	MOVQ       DX, DI
 203  	RORXQ      $0x29, AX, R13
 204  	RORXQ      $0x12, AX, R14
 205  	ADDQ       (SP), R8
 206  	ORQ        R10, DI
 207  	MOVQ       BX, R15
 208  	RORXQ      $0x22, DX, R12
 209  	XORQ       R14, R13
 210  	XORQ       CX, R15
 211  	RORXQ      $0x0e, AX, R14
 212  	ANDQ       AX, R15
 213  	XORQ       R14, R13
 214  	RORXQ      $0x27, DX, R14
 215  	ADDQ       R8, R11
 216  	ANDQ       R9, DI
 217  	XORQ       R12, R14
 218  	RORXQ      $0x1c, DX, R12
 219  	XORQ       CX, R15
 220  	XORQ       R12, R14
 221  	MOVQ       DX, R12
 222  	ANDQ       R10, R12
 223  	ADDQ       R13, R15
 224  	ORQ        R12, DI
 225  	ADDQ       R14, R8
 226  	ADDQ       R15, R11
 227  	ADDQ       R15, R8
 228  	ADDQ       DI, R8
 229  	VPSRLQ     $0x08, Y1, Y2
 230  	VPSLLQ     $0x38, Y1, Y1
 231  	VPOR       Y2, Y1, Y1
 232  	VPXOR      Y8, Y3, Y3
 233  	VPXOR      Y1, Y3, Y1
 234  	VPADDQ     Y1, Y0, Y0
 235  	VPERM2F128 $0x00, Y0, Y0, Y5
 236  	VPAND      MASK_YMM_LO<>+0(SB), Y0, Y0
 237  	VPERM2F128 $0x11, Y4, Y4, Y2
 238  	VPSRLQ     $0x06, Y2, Y8
 239  	MOVQ       R8, DI
 240  	RORXQ      $0x29, R11, R13
 241  	RORXQ      $0x12, R11, R14
 242  	ADDQ       8(SP), CX
 243  	ORQ        R9, DI
 244  	MOVQ       AX, R15
 245  	RORXQ      $0x22, R8, R12
 246  	XORQ       R14, R13
 247  	XORQ       BX, R15
 248  	RORXQ      $0x0e, R11, R14
 249  	XORQ       R14, R13
 250  	RORXQ      $0x27, R8, R14
 251  	ANDQ       R11, R15
 252  	ADDQ       CX, R10
 253  	ANDQ       DX, DI
 254  	XORQ       R12, R14
 255  	RORXQ      $0x1c, R8, R12
 256  	XORQ       BX, R15
 257  	XORQ       R12, R14
 258  	MOVQ       R8, R12
 259  	ANDQ       R9, R12
 260  	ADDQ       R13, R15
 261  	ORQ        R12, DI
 262  	ADDQ       R14, CX
 263  	ADDQ       R15, R10
 264  	ADDQ       R15, CX
 265  	ADDQ       DI, CX
 266  	VPSRLQ     $0x13, Y2, Y3
 267  	VPSLLQ     $0x2d, Y2, Y1
 268  	VPOR       Y1, Y3, Y3
 269  	VPXOR      Y3, Y8, Y8
 270  	VPSRLQ     $0x3d, Y2, Y3
 271  	VPSLLQ     $0x03, Y2, Y1
 272  	VPOR       Y1, Y3, Y3
 273  	VPXOR      Y3, Y8, Y8
 274  	VPADDQ     Y8, Y5, Y5
 275  	VPSRLQ     $0x06, Y5, Y8
 276  	MOVQ       CX, DI
 277  	RORXQ      $0x29, R10, R13
 278  	ADDQ       16(SP), BX
 279  	RORXQ      $0x12, R10, R14
 280  	ORQ        DX, DI
 281  	MOVQ       R11, R15
 282  	XORQ       AX, R15
 283  	RORXQ      $0x22, CX, R12
 284  	XORQ       R14, R13
 285  	ANDQ       R10, R15
 286  	RORXQ      $0x0e, R10, R14
 287  	ADDQ       BX, R9
 288  	ANDQ       R8, DI
 289  	XORQ       R14, R13
 290  	RORXQ      $0x27, CX, R14
 291  	XORQ       AX, R15
 292  	XORQ       R12, R14
 293  	RORXQ      $0x1c, CX, R12
 294  	XORQ       R12, R14
 295  	MOVQ       CX, R12
 296  	ANDQ       DX, R12
 297  	ADDQ       R13, R15
 298  	ORQ        R12, DI
 299  	ADDQ       R14, BX
 300  	ADDQ       R15, R9
 301  	ADDQ       R15, BX
 302  	ADDQ       DI, BX
 303  	VPSRLQ     $0x13, Y5, Y3
 304  	VPSLLQ     $0x2d, Y5, Y1
 305  	VPOR       Y1, Y3, Y3
 306  	VPXOR      Y3, Y8, Y8
 307  	VPSRLQ     $0x3d, Y5, Y3
 308  	VPSLLQ     $0x03, Y5, Y1
 309  	VPOR       Y1, Y3, Y3
 310  	VPXOR      Y3, Y8, Y8
 311  	VPADDQ     Y8, Y0, Y2
 312  	VPBLENDD   $0xf0, Y2, Y5, Y5
 313  	MOVQ       BX, DI
 314  	RORXQ      $0x29, R9, R13
 315  	RORXQ      $0x12, R9, R14
 316  	ADDQ       24(SP), AX
 317  	ORQ        R8, DI
 318  	MOVQ       R10, R15
 319  	RORXQ      $0x22, BX, R12
 320  	XORQ       R14, R13
 321  	XORQ       R11, R15
 322  	RORXQ      $0x0e, R9, R14
 323  	ANDQ       R9, R15
 324  	ADDQ       AX, DX
 325  	ANDQ       CX, DI
 326  	XORQ       R14, R13
 327  	XORQ       R11, R15
 328  	RORXQ      $0x27, BX, R14
 329  	ADDQ       R13, R15
 330  	XORQ       R12, R14
 331  	ADDQ       R15, DX
 332  	RORXQ      $0x1c, BX, R12
 333  	XORQ       R12, R14
 334  	MOVQ       BX, R12
 335  	ANDQ       R8, R12
 336  	ORQ        R12, DI
 337  	ADDQ       R14, AX
 338  	ADDQ       R15, AX
 339  	ADDQ       DI, AX
 340  	VPADDQ     64(BP), Y6, Y0
 341  	VMOVDQU    Y0, (SP)
 342  	VPERM2F128 $0x03, Y4, Y5, Y0
 343  	VPALIGNR   $0x08, Y4, Y0, Y0
 344  	VPADDQ     Y6, Y0, Y0
 345  	VPERM2F128 $0x03, Y6, Y7, Y1
 346  	VPALIGNR   $0x08, Y6, Y1, Y1
 347  	VPSRLQ     $0x01, Y1, Y2
 348  	VPSLLQ     $0x3f, Y1, Y3
 349  	VPOR       Y2, Y3, Y3
 350  	VPSRLQ     $0x07, Y1, Y8
 351  	MOVQ       AX, DI
 352  	RORXQ      $0x29, DX, R13
 353  	RORXQ      $0x12, DX, R14
 354  	ADDQ       (SP), R11
 355  	ORQ        CX, DI
 356  	MOVQ       R9, R15
 357  	RORXQ      $0x22, AX, R12
 358  	XORQ       R14, R13
 359  	XORQ       R10, R15
 360  	RORXQ      $0x0e, DX, R14
 361  	ANDQ       DX, R15
 362  	XORQ       R14, R13
 363  	RORXQ      $0x27, AX, R14
 364  	ADDQ       R11, R8
 365  	ANDQ       BX, DI
 366  	XORQ       R12, R14
 367  	RORXQ      $0x1c, AX, R12
 368  	XORQ       R10, R15
 369  	XORQ       R12, R14
 370  	MOVQ       AX, R12
 371  	ANDQ       CX, R12
 372  	ADDQ       R13, R15
 373  	ORQ        R12, DI
 374  	ADDQ       R14, R11
 375  	ADDQ       R15, R8
 376  	ADDQ       R15, R11
 377  	ADDQ       DI, R11
 378  	VPSRLQ     $0x08, Y1, Y2
 379  	VPSLLQ     $0x38, Y1, Y1
 380  	VPOR       Y2, Y1, Y1
 381  	VPXOR      Y8, Y3, Y3
 382  	VPXOR      Y1, Y3, Y1
 383  	VPADDQ     Y1, Y0, Y0
 384  	VPERM2F128 $0x00, Y0, Y0, Y6
 385  	VPAND      MASK_YMM_LO<>+0(SB), Y0, Y0
 386  	VPERM2F128 $0x11, Y5, Y5, Y2
 387  	VPSRLQ     $0x06, Y2, Y8
 388  	MOVQ       R11, DI
 389  	RORXQ      $0x29, R8, R13
 390  	RORXQ      $0x12, R8, R14
 391  	ADDQ       8(SP), R10
 392  	ORQ        BX, DI
 393  	MOVQ       DX, R15
 394  	RORXQ      $0x22, R11, R12
 395  	XORQ       R14, R13
 396  	XORQ       R9, R15
 397  	RORXQ      $0x0e, R8, R14
 398  	XORQ       R14, R13
 399  	RORXQ      $0x27, R11, R14
 400  	ANDQ       R8, R15
 401  	ADDQ       R10, CX
 402  	ANDQ       AX, DI
 403  	XORQ       R12, R14
 404  	RORXQ      $0x1c, R11, R12
 405  	XORQ       R9, R15
 406  	XORQ       R12, R14
 407  	MOVQ       R11, R12
 408  	ANDQ       BX, R12
 409  	ADDQ       R13, R15
 410  	ORQ        R12, DI
 411  	ADDQ       R14, R10
 412  	ADDQ       R15, CX
 413  	ADDQ       R15, R10
 414  	ADDQ       DI, R10
 415  	VPSRLQ     $0x13, Y2, Y3
 416  	VPSLLQ     $0x2d, Y2, Y1
 417  	VPOR       Y1, Y3, Y3
 418  	VPXOR      Y3, Y8, Y8
 419  	VPSRLQ     $0x3d, Y2, Y3
 420  	VPSLLQ     $0x03, Y2, Y1
 421  	VPOR       Y1, Y3, Y3
 422  	VPXOR      Y3, Y8, Y8
 423  	VPADDQ     Y8, Y6, Y6
 424  	VPSRLQ     $0x06, Y6, Y8
 425  	MOVQ       R10, DI
 426  	RORXQ      $0x29, CX, R13
 427  	ADDQ       16(SP), R9
 428  	RORXQ      $0x12, CX, R14
 429  	ORQ        AX, DI
 430  	MOVQ       R8, R15
 431  	XORQ       DX, R15
 432  	RORXQ      $0x22, R10, R12
 433  	XORQ       R14, R13
 434  	ANDQ       CX, R15
 435  	RORXQ      $0x0e, CX, R14
 436  	ADDQ       R9, BX
 437  	ANDQ       R11, DI
 438  	XORQ       R14, R13
 439  	RORXQ      $0x27, R10, R14
 440  	XORQ       DX, R15
 441  	XORQ       R12, R14
 442  	RORXQ      $0x1c, R10, R12
 443  	XORQ       R12, R14
 444  	MOVQ       R10, R12
 445  	ANDQ       AX, R12
 446  	ADDQ       R13, R15
 447  	ORQ        R12, DI
 448  	ADDQ       R14, R9
 449  	ADDQ       R15, BX
 450  	ADDQ       R15, R9
 451  	ADDQ       DI, R9
 452  	VPSRLQ     $0x13, Y6, Y3
 453  	VPSLLQ     $0x2d, Y6, Y1
 454  	VPOR       Y1, Y3, Y3
 455  	VPXOR      Y3, Y8, Y8
 456  	VPSRLQ     $0x3d, Y6, Y3
 457  	VPSLLQ     $0x03, Y6, Y1
 458  	VPOR       Y1, Y3, Y3
 459  	VPXOR      Y3, Y8, Y8
 460  	VPADDQ     Y8, Y0, Y2
 461  	VPBLENDD   $0xf0, Y2, Y6, Y6
 462  	MOVQ       R9, DI
 463  	RORXQ      $0x29, BX, R13
 464  	RORXQ      $0x12, BX, R14
 465  	ADDQ       24(SP), DX
 466  	ORQ        R11, DI
 467  	MOVQ       CX, R15
 468  	RORXQ      $0x22, R9, R12
 469  	XORQ       R14, R13
 470  	XORQ       R8, R15
 471  	RORXQ      $0x0e, BX, R14
 472  	ANDQ       BX, R15
 473  	ADDQ       DX, AX
 474  	ANDQ       R10, DI
 475  	XORQ       R14, R13
 476  	XORQ       R8, R15
 477  	RORXQ      $0x27, R9, R14
 478  	ADDQ       R13, R15
 479  	XORQ       R12, R14
 480  	ADDQ       R15, AX
 481  	RORXQ      $0x1c, R9, R12
 482  	XORQ       R12, R14
 483  	MOVQ       R9, R12
 484  	ANDQ       R11, R12
 485  	ORQ        R12, DI
 486  	ADDQ       R14, DX
 487  	ADDQ       R15, DX
 488  	ADDQ       DI, DX
 489  	VPADDQ     96(BP), Y7, Y0
 490  	VMOVDQU    Y0, (SP)
 491  	ADDQ       $0x80, BP
 492  	VPERM2F128 $0x03, Y5, Y6, Y0
 493  	VPALIGNR   $0x08, Y5, Y0, Y0
 494  	VPADDQ     Y7, Y0, Y0
 495  	VPERM2F128 $0x03, Y7, Y4, Y1
 496  	VPALIGNR   $0x08, Y7, Y1, Y1
 497  	VPSRLQ     $0x01, Y1, Y2
 498  	VPSLLQ     $0x3f, Y1, Y3
 499  	VPOR       Y2, Y3, Y3
 500  	VPSRLQ     $0x07, Y1, Y8
 501  	MOVQ       DX, DI
 502  	RORXQ      $0x29, AX, R13
 503  	RORXQ      $0x12, AX, R14
 504  	ADDQ       (SP), R8
 505  	ORQ        R10, DI
 506  	MOVQ       BX, R15
 507  	RORXQ      $0x22, DX, R12
 508  	XORQ       R14, R13
 509  	XORQ       CX, R15
 510  	RORXQ      $0x0e, AX, R14
 511  	ANDQ       AX, R15
 512  	XORQ       R14, R13
 513  	RORXQ      $0x27, DX, R14
 514  	ADDQ       R8, R11
 515  	ANDQ       R9, DI
 516  	XORQ       R12, R14
 517  	RORXQ      $0x1c, DX, R12
 518  	XORQ       CX, R15
 519  	XORQ       R12, R14
 520  	MOVQ       DX, R12
 521  	ANDQ       R10, R12
 522  	ADDQ       R13, R15
 523  	ORQ        R12, DI
 524  	ADDQ       R14, R8
 525  	ADDQ       R15, R11
 526  	ADDQ       R15, R8
 527  	ADDQ       DI, R8
 528  	VPSRLQ     $0x08, Y1, Y2
 529  	VPSLLQ     $0x38, Y1, Y1
 530  	VPOR       Y2, Y1, Y1
 531  	VPXOR      Y8, Y3, Y3
 532  	VPXOR      Y1, Y3, Y1
 533  	VPADDQ     Y1, Y0, Y0
 534  	VPERM2F128 $0x00, Y0, Y0, Y7
 535  	VPAND      MASK_YMM_LO<>+0(SB), Y0, Y0
 536  	VPERM2F128 $0x11, Y6, Y6, Y2
 537  	VPSRLQ     $0x06, Y2, Y8
 538  	MOVQ       R8, DI
 539  	RORXQ      $0x29, R11, R13
 540  	RORXQ      $0x12, R11, R14
 541  	ADDQ       8(SP), CX
 542  	ORQ        R9, DI
 543  	MOVQ       AX, R15
 544  	RORXQ      $0x22, R8, R12
 545  	XORQ       R14, R13
 546  	XORQ       BX, R15
 547  	RORXQ      $0x0e, R11, R14
 548  	XORQ       R14, R13
 549  	RORXQ      $0x27, R8, R14
 550  	ANDQ       R11, R15
 551  	ADDQ       CX, R10
 552  	ANDQ       DX, DI
 553  	XORQ       R12, R14
 554  	RORXQ      $0x1c, R8, R12
 555  	XORQ       BX, R15
 556  	XORQ       R12, R14
 557  	MOVQ       R8, R12
 558  	ANDQ       R9, R12
 559  	ADDQ       R13, R15
 560  	ORQ        R12, DI
 561  	ADDQ       R14, CX
 562  	ADDQ       R15, R10
 563  	ADDQ       R15, CX
 564  	ADDQ       DI, CX
 565  	VPSRLQ     $0x13, Y2, Y3
 566  	VPSLLQ     $0x2d, Y2, Y1
 567  	VPOR       Y1, Y3, Y3
 568  	VPXOR      Y3, Y8, Y8
 569  	VPSRLQ     $0x3d, Y2, Y3
 570  	VPSLLQ     $0x03, Y2, Y1
 571  	VPOR       Y1, Y3, Y3
 572  	VPXOR      Y3, Y8, Y8
 573  	VPADDQ     Y8, Y7, Y7
 574  	VPSRLQ     $0x06, Y7, Y8
 575  	MOVQ       CX, DI
 576  	RORXQ      $0x29, R10, R13
 577  	ADDQ       16(SP), BX
 578  	RORXQ      $0x12, R10, R14
 579  	ORQ        DX, DI
 580  	MOVQ       R11, R15
 581  	XORQ       AX, R15
 582  	RORXQ      $0x22, CX, R12
 583  	XORQ       R14, R13
 584  	ANDQ       R10, R15
 585  	RORXQ      $0x0e, R10, R14
 586  	ADDQ       BX, R9
 587  	ANDQ       R8, DI
 588  	XORQ       R14, R13
 589  	RORXQ      $0x27, CX, R14
 590  	XORQ       AX, R15
 591  	XORQ       R12, R14
 592  	RORXQ      $0x1c, CX, R12
 593  	XORQ       R12, R14
 594  	MOVQ       CX, R12
 595  	ANDQ       DX, R12
 596  	ADDQ       R13, R15
 597  	ORQ        R12, DI
 598  	ADDQ       R14, BX
 599  	ADDQ       R15, R9
 600  	ADDQ       R15, BX
 601  	ADDQ       DI, BX
 602  	VPSRLQ     $0x13, Y7, Y3
 603  	VPSLLQ     $0x2d, Y7, Y1
 604  	VPOR       Y1, Y3, Y3
 605  	VPXOR      Y3, Y8, Y8
 606  	VPSRLQ     $0x3d, Y7, Y3
 607  	VPSLLQ     $0x03, Y7, Y1
 608  	VPOR       Y1, Y3, Y3
 609  	VPXOR      Y3, Y8, Y8
 610  	VPADDQ     Y8, Y0, Y2
 611  	VPBLENDD   $0xf0, Y2, Y7, Y7
 612  	MOVQ       BX, DI
 613  	RORXQ      $0x29, R9, R13
 614  	RORXQ      $0x12, R9, R14
 615  	ADDQ       24(SP), AX
 616  	ORQ        R8, DI
 617  	MOVQ       R10, R15
 618  	RORXQ      $0x22, BX, R12
 619  	XORQ       R14, R13
 620  	XORQ       R11, R15
 621  	RORXQ      $0x0e, R9, R14
 622  	ANDQ       R9, R15
 623  	ADDQ       AX, DX
 624  	ANDQ       CX, DI
 625  	XORQ       R14, R13
 626  	XORQ       R11, R15
 627  	RORXQ      $0x27, BX, R14
 628  	ADDQ       R13, R15
 629  	XORQ       R12, R14
 630  	ADDQ       R15, DX
 631  	RORXQ      $0x1c, BX, R12
 632  	XORQ       R12, R14
 633  	MOVQ       BX, R12
 634  	ANDQ       R8, R12
 635  	ORQ        R12, DI
 636  	ADDQ       R14, AX
 637  	ADDQ       R15, AX
 638  	ADDQ       DI, AX
 639  	SUBQ       $0x01, 32(SP)
 640  	JNE        loop1
 641  	MOVQ       $0x00000002, 32(SP)
 642  
 643  loop2:
 644  	VPADDQ  (BP), Y4, Y0
 645  	VMOVDQU Y0, (SP)
 646  	MOVQ    R9, R15
 647  	RORXQ   $0x29, DX, R13
 648  	RORXQ   $0x12, DX, R14
 649  	XORQ    R10, R15
 650  	XORQ    R14, R13
 651  	RORXQ   $0x0e, DX, R14
 652  	ANDQ    DX, R15
 653  	XORQ    R14, R13
 654  	RORXQ   $0x22, AX, R12
 655  	XORQ    R10, R15
 656  	RORXQ   $0x27, AX, R14
 657  	MOVQ    AX, DI
 658  	XORQ    R12, R14
 659  	RORXQ   $0x1c, AX, R12
 660  	ADDQ    (SP), R11
 661  	ORQ     CX, DI
 662  	XORQ    R12, R14
 663  	MOVQ    AX, R12
 664  	ANDQ    BX, DI
 665  	ANDQ    CX, R12
 666  	ADDQ    R13, R15
 667  	ADDQ    R11, R8
 668  	ORQ     R12, DI
 669  	ADDQ    R14, R11
 670  	ADDQ    R15, R8
 671  	ADDQ    R15, R11
 672  	MOVQ    DX, R15
 673  	RORXQ   $0x29, R8, R13
 674  	RORXQ   $0x12, R8, R14
 675  	XORQ    R9, R15
 676  	XORQ    R14, R13
 677  	RORXQ   $0x0e, R8, R14
 678  	ANDQ    R8, R15
 679  	ADDQ    DI, R11
 680  	XORQ    R14, R13
 681  	RORXQ   $0x22, R11, R12
 682  	XORQ    R9, R15
 683  	RORXQ   $0x27, R11, R14
 684  	MOVQ    R11, DI
 685  	XORQ    R12, R14
 686  	RORXQ   $0x1c, R11, R12
 687  	ADDQ    8(SP), R10
 688  	ORQ     BX, DI
 689  	XORQ    R12, R14
 690  	MOVQ    R11, R12
 691  	ANDQ    AX, DI
 692  	ANDQ    BX, R12
 693  	ADDQ    R13, R15
 694  	ADDQ    R10, CX
 695  	ORQ     R12, DI
 696  	ADDQ    R14, R10
 697  	ADDQ    R15, CX
 698  	ADDQ    R15, R10
 699  	MOVQ    R8, R15
 700  	RORXQ   $0x29, CX, R13
 701  	RORXQ   $0x12, CX, R14
 702  	XORQ    DX, R15
 703  	XORQ    R14, R13
 704  	RORXQ   $0x0e, CX, R14
 705  	ANDQ    CX, R15
 706  	ADDQ    DI, R10
 707  	XORQ    R14, R13
 708  	RORXQ   $0x22, R10, R12
 709  	XORQ    DX, R15
 710  	RORXQ   $0x27, R10, R14
 711  	MOVQ    R10, DI
 712  	XORQ    R12, R14
 713  	RORXQ   $0x1c, R10, R12
 714  	ADDQ    16(SP), R9
 715  	ORQ     AX, DI
 716  	XORQ    R12, R14
 717  	MOVQ    R10, R12
 718  	ANDQ    R11, DI
 719  	ANDQ    AX, R12
 720  	ADDQ    R13, R15
 721  	ADDQ    R9, BX
 722  	ORQ     R12, DI
 723  	ADDQ    R14, R9
 724  	ADDQ    R15, BX
 725  	ADDQ    R15, R9
 726  	MOVQ    CX, R15
 727  	RORXQ   $0x29, BX, R13
 728  	RORXQ   $0x12, BX, R14
 729  	XORQ    R8, R15
 730  	XORQ    R14, R13
 731  	RORXQ   $0x0e, BX, R14
 732  	ANDQ    BX, R15
 733  	ADDQ    DI, R9
 734  	XORQ    R14, R13
 735  	RORXQ   $0x22, R9, R12
 736  	XORQ    R8, R15
 737  	RORXQ   $0x27, R9, R14
 738  	MOVQ    R9, DI
 739  	XORQ    R12, R14
 740  	RORXQ   $0x1c, R9, R12
 741  	ADDQ    24(SP), DX
 742  	ORQ     R11, DI
 743  	XORQ    R12, R14
 744  	MOVQ    R9, R12
 745  	ANDQ    R10, DI
 746  	ANDQ    R11, R12
 747  	ADDQ    R13, R15
 748  	ADDQ    DX, AX
 749  	ORQ     R12, DI
 750  	ADDQ    R14, DX
 751  	ADDQ    R15, AX
 752  	ADDQ    R15, DX
 753  	ADDQ    DI, DX
 754  	VPADDQ  32(BP), Y5, Y0
 755  	VMOVDQU Y0, (SP)
 756  	ADDQ    $0x40, BP
 757  	MOVQ    BX, R15
 758  	RORXQ   $0x29, AX, R13
 759  	RORXQ   $0x12, AX, R14
 760  	XORQ    CX, R15
 761  	XORQ    R14, R13
 762  	RORXQ   $0x0e, AX, R14
 763  	ANDQ    AX, R15
 764  	XORQ    R14, R13
 765  	RORXQ   $0x22, DX, R12
 766  	XORQ    CX, R15
 767  	RORXQ   $0x27, DX, R14
 768  	MOVQ    DX, DI
 769  	XORQ    R12, R14
 770  	RORXQ   $0x1c, DX, R12
 771  	ADDQ    (SP), R8
 772  	ORQ     R10, DI
 773  	XORQ    R12, R14
 774  	MOVQ    DX, R12
 775  	ANDQ    R9, DI
 776  	ANDQ    R10, R12
 777  	ADDQ    R13, R15
 778  	ADDQ    R8, R11
 779  	ORQ     R12, DI
 780  	ADDQ    R14, R8
 781  	ADDQ    R15, R11
 782  	ADDQ    R15, R8
 783  	MOVQ    AX, R15
 784  	RORXQ   $0x29, R11, R13
 785  	RORXQ   $0x12, R11, R14
 786  	XORQ    BX, R15
 787  	XORQ    R14, R13
 788  	RORXQ   $0x0e, R11, R14
 789  	ANDQ    R11, R15
 790  	ADDQ    DI, R8
 791  	XORQ    R14, R13
 792  	RORXQ   $0x22, R8, R12
 793  	XORQ    BX, R15
 794  	RORXQ   $0x27, R8, R14
 795  	MOVQ    R8, DI
 796  	XORQ    R12, R14
 797  	RORXQ   $0x1c, R8, R12
 798  	ADDQ    8(SP), CX
 799  	ORQ     R9, DI
 800  	XORQ    R12, R14
 801  	MOVQ    R8, R12
 802  	ANDQ    DX, DI
 803  	ANDQ    R9, R12
 804  	ADDQ    R13, R15
 805  	ADDQ    CX, R10
 806  	ORQ     R12, DI
 807  	ADDQ    R14, CX
 808  	ADDQ    R15, R10
 809  	ADDQ    R15, CX
 810  	MOVQ    R11, R15
 811  	RORXQ   $0x29, R10, R13
 812  	RORXQ   $0x12, R10, R14
 813  	XORQ    AX, R15
 814  	XORQ    R14, R13
 815  	RORXQ   $0x0e, R10, R14
 816  	ANDQ    R10, R15
 817  	ADDQ    DI, CX
 818  	XORQ    R14, R13
 819  	RORXQ   $0x22, CX, R12
 820  	XORQ    AX, R15
 821  	RORXQ   $0x27, CX, R14
 822  	MOVQ    CX, DI
 823  	XORQ    R12, R14
 824  	RORXQ   $0x1c, CX, R12
 825  	ADDQ    16(SP), BX
 826  	ORQ     DX, DI
 827  	XORQ    R12, R14
 828  	MOVQ    CX, R12
 829  	ANDQ    R8, DI
 830  	ANDQ    DX, R12
 831  	ADDQ    R13, R15
 832  	ADDQ    BX, R9
 833  	ORQ     R12, DI
 834  	ADDQ    R14, BX
 835  	ADDQ    R15, R9
 836  	ADDQ    R15, BX
 837  	MOVQ    R10, R15
 838  	RORXQ   $0x29, R9, R13
 839  	RORXQ   $0x12, R9, R14
 840  	XORQ    R11, R15
 841  	XORQ    R14, R13
 842  	RORXQ   $0x0e, R9, R14
 843  	ANDQ    R9, R15
 844  	ADDQ    DI, BX
 845  	XORQ    R14, R13
 846  	RORXQ   $0x22, BX, R12
 847  	XORQ    R11, R15
 848  	RORXQ   $0x27, BX, R14
 849  	MOVQ    BX, DI
 850  	XORQ    R12, R14
 851  	RORXQ   $0x1c, BX, R12
 852  	ADDQ    24(SP), AX
 853  	ORQ     R8, DI
 854  	XORQ    R12, R14
 855  	MOVQ    BX, R12
 856  	ANDQ    CX, DI
 857  	ANDQ    R8, R12
 858  	ADDQ    R13, R15
 859  	ADDQ    AX, DX
 860  	ORQ     R12, DI
 861  	ADDQ    R14, AX
 862  	ADDQ    R15, DX
 863  	ADDQ    R15, AX
 864  	ADDQ    DI, AX
 865  	VMOVDQU Y6, Y4
 866  	VMOVDQU Y7, Y5
 867  	SUBQ    $0x01, 32(SP)
 868  	JNE     loop2
 869  	ADDQ    (SI), AX
 870  	MOVQ    AX, (SI)
 871  	ADDQ    8(SI), BX
 872  	MOVQ    BX, 8(SI)
 873  	ADDQ    16(SI), CX
 874  	MOVQ    CX, 16(SI)
 875  	ADDQ    24(SI), R8
 876  	MOVQ    R8, 24(SI)
 877  	ADDQ    32(SI), DX
 878  	MOVQ    DX, 32(SI)
 879  	ADDQ    40(SI), R9
 880  	MOVQ    R9, 40(SI)
 881  	ADDQ    48(SI), R10
 882  	MOVQ    R10, 48(SI)
 883  	ADDQ    56(SI), R11
 884  	MOVQ    R11, 56(SI)
 885  	MOVQ    40(SP), DI
 886  	ADDQ    $0x80, DI
 887  	CMPQ    DI, 48(SP)
 888  	JNE     loop0
 889  
 890  done_hash:
 891  	VZEROUPPER
 892  	RET
 893  
 894  DATA PSHUFFLE_BYTE_FLIP_MASK<>+0(SB)/8, $0x0001020304050607
 895  DATA PSHUFFLE_BYTE_FLIP_MASK<>+8(SB)/8, $0x08090a0b0c0d0e0f
 896  DATA PSHUFFLE_BYTE_FLIP_MASK<>+16(SB)/8, $0x1011121314151617
 897  DATA PSHUFFLE_BYTE_FLIP_MASK<>+24(SB)/8, $0x18191a1b1c1d1e1f
 898  GLOBL PSHUFFLE_BYTE_FLIP_MASK<>(SB), RODATA|NOPTR, $32
 899  
 900  DATA MASK_YMM_LO<>+0(SB)/8, $0x0000000000000000
 901  DATA MASK_YMM_LO<>+8(SB)/8, $0x0000000000000000
 902  DATA MASK_YMM_LO<>+16(SB)/8, $0xffffffffffffffff
 903  DATA MASK_YMM_LO<>+24(SB)/8, $0xffffffffffffffff
 904  GLOBL MASK_YMM_LO<>(SB), RODATA|NOPTR, $32
 905