blake2s_amd64.s raw

   1  // Code generated by command: go run blake2s_amd64_asm.go -out ../blake2s_amd64.s -pkg blake2s. DO NOT EDIT.
   2  
   3  //go:build amd64 && gc && !purego
   4  
   5  #include "textflag.h"
   6  
   7  // func hashBlocksSSE2(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte)
   8  // Requires: SSE2
   9  TEXT ·hashBlocksSSE2(SB), $672-48
  10  	MOVQ  h+0(FP), AX
  11  	MOVQ  c+8(FP), BX
  12  	MOVL  flag+16(FP), CX
  13  	MOVQ  blocks_base+24(FP), SI
  14  	MOVQ  blocks_len+32(FP), DX
  15  	MOVQ  SP, BP
  16  	ADDQ  $0x0f, BP
  17  	ANDQ  $-16, BP
  18  	MOVQ  (BX), R9
  19  	MOVQ  R9, (BP)
  20  	MOVQ  CX, 8(BP)
  21  	MOVOU (AX), X0
  22  	MOVOU 16(AX), X1
  23  	MOVOU iv0<>+0(SB), X2
  24  	MOVOU iv1<>+0(SB), X3
  25  	MOVOU counter<>+0(SB), X12
  26  	MOVOU rol16<>+0(SB), X13
  27  	MOVOU rol8<>+0(SB), X14
  28  	MOVO  (BP), X15
  29  
  30  loop:
  31  	MOVO   X0, X4
  32  	MOVO   X1, X5
  33  	MOVO   X2, X6
  34  	MOVO   X3, X7
  35  	PADDQ  X12, X15
  36  	PXOR   X15, X7
  37  	MOVQ   (SI), R8
  38  	MOVQ   8(SI), R9
  39  	MOVQ   16(SI), R10
  40  	MOVQ   24(SI), R11
  41  	MOVQ   32(SI), R12
  42  	MOVQ   40(SI), R13
  43  	MOVQ   48(SI), R14
  44  	MOVQ   56(SI), R15
  45  	MOVL   R8, 16(BP)
  46  	MOVL   R8, 116(BP)
  47  	MOVL   R8, 164(BP)
  48  	MOVL   R8, 264(BP)
  49  	MOVL   R8, 288(BP)
  50  	MOVL   R8, 344(BP)
  51  	MOVL   R8, 432(BP)
  52  	MOVL   R8, 512(BP)
  53  	MOVL   R8, 540(BP)
  54  	MOVL   R8, 652(BP)
  55  	SHRQ   $0x20, R8
  56  	MOVL   R8, 32(BP)
  57  	MOVL   R8, 112(BP)
  58  	MOVL   R8, 200(BP)
  59  	MOVL   R8, 228(BP)
  60  	MOVL   R8, 320(BP)
  61  	MOVL   R8, 380(BP)
  62  	MOVL   R8, 404(BP)
  63  	MOVL   R8, 488(BP)
  64  	MOVL   R8, 568(BP)
  65  	MOVL   R8, 604(BP)
  66  	MOVL   R9, 20(BP)
  67  	MOVL   R9, 132(BP)
  68  	MOVL   R9, 168(BP)
  69  	MOVL   R9, 240(BP)
  70  	MOVL   R9, 280(BP)
  71  	MOVL   R9, 336(BP)
  72  	MOVL   R9, 456(BP)
  73  	MOVL   R9, 508(BP)
  74  	MOVL   R9, 576(BP)
  75  	MOVL   R9, 608(BP)
  76  	SHRQ   $0x20, R9
  77  	MOVL   R9, 36(BP)
  78  	MOVL   R9, 140(BP)
  79  	MOVL   R9, 180(BP)
  80  	MOVL   R9, 212(BP)
  81  	MOVL   R9, 316(BP)
  82  	MOVL   R9, 364(BP)
  83  	MOVL   R9, 452(BP)
  84  	MOVL   R9, 476(BP)
  85  	MOVL   R9, 552(BP)
  86  	MOVL   R9, 632(BP)
  87  	MOVL   R10, 24(BP)
  88  	MOVL   R10, 84(BP)
  89  	MOVL   R10, 204(BP)
  90  	MOVL   R10, 248(BP)
  91  	MOVL   R10, 296(BP)
  92  	MOVL   R10, 368(BP)
  93  	MOVL   R10, 412(BP)
  94  	MOVL   R10, 516(BP)
  95  	MOVL   R10, 584(BP)
  96  	MOVL   R10, 612(BP)
  97  	SHRQ   $0x20, R10
  98  	MOVL   R10, 40(BP)
  99  	MOVL   R10, 124(BP)
 100  	MOVL   R10, 152(BP)
 101  	MOVL   R10, 244(BP)
 102  	MOVL   R10, 276(BP)
 103  	MOVL   R10, 388(BP)
 104  	MOVL   R10, 416(BP)
 105  	MOVL   R10, 496(BP)
 106  	MOVL   R10, 588(BP)
 107  	MOVL   R10, 620(BP)
 108  	MOVL   R11, 28(BP)
 109  	MOVL   R11, 108(BP)
 110  	MOVL   R11, 196(BP)
 111  	MOVL   R11, 256(BP)
 112  	MOVL   R11, 312(BP)
 113  	MOVL   R11, 340(BP)
 114  	MOVL   R11, 436(BP)
 115  	MOVL   R11, 520(BP)
 116  	MOVL   R11, 528(BP)
 117  	MOVL   R11, 616(BP)
 118  	SHRQ   $0x20, R11
 119  	MOVL   R11, 44(BP)
 120  	MOVL   R11, 136(BP)
 121  	MOVL   R11, 184(BP)
 122  	MOVL   R11, 208(BP)
 123  	MOVL   R11, 292(BP)
 124  	MOVL   R11, 372(BP)
 125  	MOVL   R11, 448(BP)
 126  	MOVL   R11, 468(BP)
 127  	MOVL   R11, 580(BP)
 128  	MOVL   R11, 600(BP)
 129  	MOVL   R12, 48(BP)
 130  	MOVL   R12, 100(BP)
 131  	MOVL   R12, 160(BP)
 132  	MOVL   R12, 268(BP)
 133  	MOVL   R12, 328(BP)
 134  	MOVL   R12, 348(BP)
 135  	MOVL   R12, 444(BP)
 136  	MOVL   R12, 504(BP)
 137  	MOVL   R12, 556(BP)
 138  	MOVL   R12, 596(BP)
 139  	SHRQ   $0x20, R12
 140  	MOVL   R12, 64(BP)
 141  	MOVL   R12, 88(BP)
 142  	MOVL   R12, 188(BP)
 143  	MOVL   R12, 224(BP)
 144  	MOVL   R12, 272(BP)
 145  	MOVL   R12, 396(BP)
 146  	MOVL   R12, 440(BP)
 147  	MOVL   R12, 492(BP)
 148  	MOVL   R12, 548(BP)
 149  	MOVL   R12, 628(BP)
 150  	MOVL   R13, 52(BP)
 151  	MOVL   R13, 96(BP)
 152  	MOVL   R13, 176(BP)
 153  	MOVL   R13, 260(BP)
 154  	MOVL   R13, 284(BP)
 155  	MOVL   R13, 356(BP)
 156  	MOVL   R13, 428(BP)
 157  	MOVL   R13, 524(BP)
 158  	MOVL   R13, 572(BP)
 159  	MOVL   R13, 592(BP)
 160  	SHRQ   $0x20, R13
 161  	MOVL   R13, 68(BP)
 162  	MOVL   R13, 120(BP)
 163  	MOVL   R13, 144(BP)
 164  	MOVL   R13, 220(BP)
 165  	MOVL   R13, 308(BP)
 166  	MOVL   R13, 360(BP)
 167  	MOVL   R13, 460(BP)
 168  	MOVL   R13, 480(BP)
 169  	MOVL   R13, 536(BP)
 170  	MOVL   R13, 640(BP)
 171  	MOVL   R14, 56(BP)
 172  	MOVL   R14, 128(BP)
 173  	MOVL   R14, 148(BP)
 174  	MOVL   R14, 232(BP)
 175  	MOVL   R14, 324(BP)
 176  	MOVL   R14, 352(BP)
 177  	MOVL   R14, 400(BP)
 178  	MOVL   R14, 472(BP)
 179  	MOVL   R14, 560(BP)
 180  	MOVL   R14, 648(BP)
 181  	SHRQ   $0x20, R14
 182  	MOVL   R14, 72(BP)
 183  	MOVL   R14, 92(BP)
 184  	MOVL   R14, 172(BP)
 185  	MOVL   R14, 216(BP)
 186  	MOVL   R14, 332(BP)
 187  	MOVL   R14, 384(BP)
 188  	MOVL   R14, 424(BP)
 189  	MOVL   R14, 464(BP)
 190  	MOVL   R14, 564(BP)
 191  	MOVL   R14, 636(BP)
 192  	MOVL   R15, 60(BP)
 193  	MOVL   R15, 80(BP)
 194  	MOVL   R15, 192(BP)
 195  	MOVL   R15, 236(BP)
 196  	MOVL   R15, 304(BP)
 197  	MOVL   R15, 392(BP)
 198  	MOVL   R15, 408(BP)
 199  	MOVL   R15, 484(BP)
 200  	MOVL   R15, 532(BP)
 201  	MOVL   R15, 644(BP)
 202  	SHRQ   $0x20, R15
 203  	MOVL   R15, 76(BP)
 204  	MOVL   R15, 104(BP)
 205  	MOVL   R15, 156(BP)
 206  	MOVL   R15, 252(BP)
 207  	MOVL   R15, 300(BP)
 208  	MOVL   R15, 376(BP)
 209  	MOVL   R15, 420(BP)
 210  	MOVL   R15, 500(BP)
 211  	MOVL   R15, 544(BP)
 212  	MOVL   R15, 624(BP)
 213  	PADDL  16(BP), X4
 214  	PADDL  X5, X4
 215  	PXOR   X4, X7
 216  	MOVO   X7, X8
 217  	PSLLL  $0x10, X8
 218  	PSRLL  $0x10, X7
 219  	PXOR   X8, X7
 220  	PADDL  X7, X6
 221  	PXOR   X6, X5
 222  	MOVO   X5, X8
 223  	PSLLL  $0x14, X8
 224  	PSRLL  $0x0c, X5
 225  	PXOR   X8, X5
 226  	PADDL  32(BP), X4
 227  	PADDL  X5, X4
 228  	PXOR   X4, X7
 229  	MOVO   X7, X8
 230  	PSLLL  $0x18, X8
 231  	PSRLL  $0x08, X7
 232  	PXOR   X8, X7
 233  	PADDL  X7, X6
 234  	PXOR   X6, X5
 235  	MOVO   X5, X8
 236  	PSLLL  $0x19, X8
 237  	PSRLL  $0x07, X5
 238  	PXOR   X8, X5
 239  	PSHUFL $0x39, X5, X5
 240  	PSHUFL $0x4e, X6, X6
 241  	PSHUFL $0x93, X7, X7
 242  	PADDL  48(BP), X4
 243  	PADDL  X5, X4
 244  	PXOR   X4, X7
 245  	MOVO   X7, X8
 246  	PSLLL  $0x10, X8
 247  	PSRLL  $0x10, X7
 248  	PXOR   X8, X7
 249  	PADDL  X7, X6
 250  	PXOR   X6, X5
 251  	MOVO   X5, X8
 252  	PSLLL  $0x14, X8
 253  	PSRLL  $0x0c, X5
 254  	PXOR   X8, X5
 255  	PADDL  64(BP), X4
 256  	PADDL  X5, X4
 257  	PXOR   X4, X7
 258  	MOVO   X7, X8
 259  	PSLLL  $0x18, X8
 260  	PSRLL  $0x08, X7
 261  	PXOR   X8, X7
 262  	PADDL  X7, X6
 263  	PXOR   X6, X5
 264  	MOVO   X5, X8
 265  	PSLLL  $0x19, X8
 266  	PSRLL  $0x07, X5
 267  	PXOR   X8, X5
 268  	PSHUFL $0x39, X7, X7
 269  	PSHUFL $0x4e, X6, X6
 270  	PSHUFL $0x93, X5, X5
 271  	PADDL  80(BP), X4
 272  	PADDL  X5, X4
 273  	PXOR   X4, X7
 274  	MOVO   X7, X8
 275  	PSLLL  $0x10, X8
 276  	PSRLL  $0x10, X7
 277  	PXOR   X8, X7
 278  	PADDL  X7, X6
 279  	PXOR   X6, X5
 280  	MOVO   X5, X8
 281  	PSLLL  $0x14, X8
 282  	PSRLL  $0x0c, X5
 283  	PXOR   X8, X5
 284  	PADDL  96(BP), X4
 285  	PADDL  X5, X4
 286  	PXOR   X4, X7
 287  	MOVO   X7, X8
 288  	PSLLL  $0x18, X8
 289  	PSRLL  $0x08, X7
 290  	PXOR   X8, X7
 291  	PADDL  X7, X6
 292  	PXOR   X6, X5
 293  	MOVO   X5, X8
 294  	PSLLL  $0x19, X8
 295  	PSRLL  $0x07, X5
 296  	PXOR   X8, X5
 297  	PSHUFL $0x39, X5, X5
 298  	PSHUFL $0x4e, X6, X6
 299  	PSHUFL $0x93, X7, X7
 300  	PADDL  112(BP), X4
 301  	PADDL  X5, X4
 302  	PXOR   X4, X7
 303  	MOVO   X7, X8
 304  	PSLLL  $0x10, X8
 305  	PSRLL  $0x10, X7
 306  	PXOR   X8, X7
 307  	PADDL  X7, X6
 308  	PXOR   X6, X5
 309  	MOVO   X5, X8
 310  	PSLLL  $0x14, X8
 311  	PSRLL  $0x0c, X5
 312  	PXOR   X8, X5
 313  	PADDL  128(BP), X4
 314  	PADDL  X5, X4
 315  	PXOR   X4, X7
 316  	MOVO   X7, X8
 317  	PSLLL  $0x18, X8
 318  	PSRLL  $0x08, X7
 319  	PXOR   X8, X7
 320  	PADDL  X7, X6
 321  	PXOR   X6, X5
 322  	MOVO   X5, X8
 323  	PSLLL  $0x19, X8
 324  	PSRLL  $0x07, X5
 325  	PXOR   X8, X5
 326  	PSHUFL $0x39, X7, X7
 327  	PSHUFL $0x4e, X6, X6
 328  	PSHUFL $0x93, X5, X5
 329  	PADDL  144(BP), X4
 330  	PADDL  X5, X4
 331  	PXOR   X4, X7
 332  	MOVO   X7, X8
 333  	PSLLL  $0x10, X8
 334  	PSRLL  $0x10, X7
 335  	PXOR   X8, X7
 336  	PADDL  X7, X6
 337  	PXOR   X6, X5
 338  	MOVO   X5, X8
 339  	PSLLL  $0x14, X8
 340  	PSRLL  $0x0c, X5
 341  	PXOR   X8, X5
 342  	PADDL  160(BP), X4
 343  	PADDL  X5, X4
 344  	PXOR   X4, X7
 345  	MOVO   X7, X8
 346  	PSLLL  $0x18, X8
 347  	PSRLL  $0x08, X7
 348  	PXOR   X8, X7
 349  	PADDL  X7, X6
 350  	PXOR   X6, X5
 351  	MOVO   X5, X8
 352  	PSLLL  $0x19, X8
 353  	PSRLL  $0x07, X5
 354  	PXOR   X8, X5
 355  	PSHUFL $0x39, X5, X5
 356  	PSHUFL $0x4e, X6, X6
 357  	PSHUFL $0x93, X7, X7
 358  	PADDL  176(BP), X4
 359  	PADDL  X5, X4
 360  	PXOR   X4, X7
 361  	MOVO   X7, X8
 362  	PSLLL  $0x10, X8
 363  	PSRLL  $0x10, X7
 364  	PXOR   X8, X7
 365  	PADDL  X7, X6
 366  	PXOR   X6, X5
 367  	MOVO   X5, X8
 368  	PSLLL  $0x14, X8
 369  	PSRLL  $0x0c, X5
 370  	PXOR   X8, X5
 371  	PADDL  192(BP), X4
 372  	PADDL  X5, X4
 373  	PXOR   X4, X7
 374  	MOVO   X7, X8
 375  	PSLLL  $0x18, X8
 376  	PSRLL  $0x08, X7
 377  	PXOR   X8, X7
 378  	PADDL  X7, X6
 379  	PXOR   X6, X5
 380  	MOVO   X5, X8
 381  	PSLLL  $0x19, X8
 382  	PSRLL  $0x07, X5
 383  	PXOR   X8, X5
 384  	PSHUFL $0x39, X7, X7
 385  	PSHUFL $0x4e, X6, X6
 386  	PSHUFL $0x93, X5, X5
 387  	PADDL  208(BP), X4
 388  	PADDL  X5, X4
 389  	PXOR   X4, X7
 390  	MOVO   X7, X8
 391  	PSLLL  $0x10, X8
 392  	PSRLL  $0x10, X7
 393  	PXOR   X8, X7
 394  	PADDL  X7, X6
 395  	PXOR   X6, X5
 396  	MOVO   X5, X8
 397  	PSLLL  $0x14, X8
 398  	PSRLL  $0x0c, X5
 399  	PXOR   X8, X5
 400  	PADDL  224(BP), X4
 401  	PADDL  X5, X4
 402  	PXOR   X4, X7
 403  	MOVO   X7, X8
 404  	PSLLL  $0x18, X8
 405  	PSRLL  $0x08, X7
 406  	PXOR   X8, X7
 407  	PADDL  X7, X6
 408  	PXOR   X6, X5
 409  	MOVO   X5, X8
 410  	PSLLL  $0x19, X8
 411  	PSRLL  $0x07, X5
 412  	PXOR   X8, X5
 413  	PSHUFL $0x39, X5, X5
 414  	PSHUFL $0x4e, X6, X6
 415  	PSHUFL $0x93, X7, X7
 416  	PADDL  240(BP), X4
 417  	PADDL  X5, X4
 418  	PXOR   X4, X7
 419  	MOVO   X7, X8
 420  	PSLLL  $0x10, X8
 421  	PSRLL  $0x10, X7
 422  	PXOR   X8, X7
 423  	PADDL  X7, X6
 424  	PXOR   X6, X5
 425  	MOVO   X5, X8
 426  	PSLLL  $0x14, X8
 427  	PSRLL  $0x0c, X5
 428  	PXOR   X8, X5
 429  	PADDL  256(BP), X4
 430  	PADDL  X5, X4
 431  	PXOR   X4, X7
 432  	MOVO   X7, X8
 433  	PSLLL  $0x18, X8
 434  	PSRLL  $0x08, X7
 435  	PXOR   X8, X7
 436  	PADDL  X7, X6
 437  	PXOR   X6, X5
 438  	MOVO   X5, X8
 439  	PSLLL  $0x19, X8
 440  	PSRLL  $0x07, X5
 441  	PXOR   X8, X5
 442  	PSHUFL $0x39, X7, X7
 443  	PSHUFL $0x4e, X6, X6
 444  	PSHUFL $0x93, X5, X5
 445  	PADDL  272(BP), X4
 446  	PADDL  X5, X4
 447  	PXOR   X4, X7
 448  	MOVO   X7, X8
 449  	PSLLL  $0x10, X8
 450  	PSRLL  $0x10, X7
 451  	PXOR   X8, X7
 452  	PADDL  X7, X6
 453  	PXOR   X6, X5
 454  	MOVO   X5, X8
 455  	PSLLL  $0x14, X8
 456  	PSRLL  $0x0c, X5
 457  	PXOR   X8, X5
 458  	PADDL  288(BP), X4
 459  	PADDL  X5, X4
 460  	PXOR   X4, X7
 461  	MOVO   X7, X8
 462  	PSLLL  $0x18, X8
 463  	PSRLL  $0x08, X7
 464  	PXOR   X8, X7
 465  	PADDL  X7, X6
 466  	PXOR   X6, X5
 467  	MOVO   X5, X8
 468  	PSLLL  $0x19, X8
 469  	PSRLL  $0x07, X5
 470  	PXOR   X8, X5
 471  	PSHUFL $0x39, X5, X5
 472  	PSHUFL $0x4e, X6, X6
 473  	PSHUFL $0x93, X7, X7
 474  	PADDL  304(BP), X4
 475  	PADDL  X5, X4
 476  	PXOR   X4, X7
 477  	MOVO   X7, X8
 478  	PSLLL  $0x10, X8
 479  	PSRLL  $0x10, X7
 480  	PXOR   X8, X7
 481  	PADDL  X7, X6
 482  	PXOR   X6, X5
 483  	MOVO   X5, X8
 484  	PSLLL  $0x14, X8
 485  	PSRLL  $0x0c, X5
 486  	PXOR   X8, X5
 487  	PADDL  320(BP), X4
 488  	PADDL  X5, X4
 489  	PXOR   X4, X7
 490  	MOVO   X7, X8
 491  	PSLLL  $0x18, X8
 492  	PSRLL  $0x08, X7
 493  	PXOR   X8, X7
 494  	PADDL  X7, X6
 495  	PXOR   X6, X5
 496  	MOVO   X5, X8
 497  	PSLLL  $0x19, X8
 498  	PSRLL  $0x07, X5
 499  	PXOR   X8, X5
 500  	PSHUFL $0x39, X7, X7
 501  	PSHUFL $0x4e, X6, X6
 502  	PSHUFL $0x93, X5, X5
 503  	PADDL  336(BP), X4
 504  	PADDL  X5, X4
 505  	PXOR   X4, X7
 506  	MOVO   X7, X8
 507  	PSLLL  $0x10, X8
 508  	PSRLL  $0x10, X7
 509  	PXOR   X8, X7
 510  	PADDL  X7, X6
 511  	PXOR   X6, X5
 512  	MOVO   X5, X8
 513  	PSLLL  $0x14, X8
 514  	PSRLL  $0x0c, X5
 515  	PXOR   X8, X5
 516  	PADDL  352(BP), X4
 517  	PADDL  X5, X4
 518  	PXOR   X4, X7
 519  	MOVO   X7, X8
 520  	PSLLL  $0x18, X8
 521  	PSRLL  $0x08, X7
 522  	PXOR   X8, X7
 523  	PADDL  X7, X6
 524  	PXOR   X6, X5
 525  	MOVO   X5, X8
 526  	PSLLL  $0x19, X8
 527  	PSRLL  $0x07, X5
 528  	PXOR   X8, X5
 529  	PSHUFL $0x39, X5, X5
 530  	PSHUFL $0x4e, X6, X6
 531  	PSHUFL $0x93, X7, X7
 532  	PADDL  368(BP), X4
 533  	PADDL  X5, X4
 534  	PXOR   X4, X7
 535  	MOVO   X7, X8
 536  	PSLLL  $0x10, X8
 537  	PSRLL  $0x10, X7
 538  	PXOR   X8, X7
 539  	PADDL  X7, X6
 540  	PXOR   X6, X5
 541  	MOVO   X5, X8
 542  	PSLLL  $0x14, X8
 543  	PSRLL  $0x0c, X5
 544  	PXOR   X8, X5
 545  	PADDL  384(BP), X4
 546  	PADDL  X5, X4
 547  	PXOR   X4, X7
 548  	MOVO   X7, X8
 549  	PSLLL  $0x18, X8
 550  	PSRLL  $0x08, X7
 551  	PXOR   X8, X7
 552  	PADDL  X7, X6
 553  	PXOR   X6, X5
 554  	MOVO   X5, X8
 555  	PSLLL  $0x19, X8
 556  	PSRLL  $0x07, X5
 557  	PXOR   X8, X5
 558  	PSHUFL $0x39, X7, X7
 559  	PSHUFL $0x4e, X6, X6
 560  	PSHUFL $0x93, X5, X5
 561  	PADDL  400(BP), X4
 562  	PADDL  X5, X4
 563  	PXOR   X4, X7
 564  	MOVO   X7, X8
 565  	PSLLL  $0x10, X8
 566  	PSRLL  $0x10, X7
 567  	PXOR   X8, X7
 568  	PADDL  X7, X6
 569  	PXOR   X6, X5
 570  	MOVO   X5, X8
 571  	PSLLL  $0x14, X8
 572  	PSRLL  $0x0c, X5
 573  	PXOR   X8, X5
 574  	PADDL  416(BP), X4
 575  	PADDL  X5, X4
 576  	PXOR   X4, X7
 577  	MOVO   X7, X8
 578  	PSLLL  $0x18, X8
 579  	PSRLL  $0x08, X7
 580  	PXOR   X8, X7
 581  	PADDL  X7, X6
 582  	PXOR   X6, X5
 583  	MOVO   X5, X8
 584  	PSLLL  $0x19, X8
 585  	PSRLL  $0x07, X5
 586  	PXOR   X8, X5
 587  	PSHUFL $0x39, X5, X5
 588  	PSHUFL $0x4e, X6, X6
 589  	PSHUFL $0x93, X7, X7
 590  	PADDL  432(BP), X4
 591  	PADDL  X5, X4
 592  	PXOR   X4, X7
 593  	MOVO   X7, X8
 594  	PSLLL  $0x10, X8
 595  	PSRLL  $0x10, X7
 596  	PXOR   X8, X7
 597  	PADDL  X7, X6
 598  	PXOR   X6, X5
 599  	MOVO   X5, X8
 600  	PSLLL  $0x14, X8
 601  	PSRLL  $0x0c, X5
 602  	PXOR   X8, X5
 603  	PADDL  448(BP), X4
 604  	PADDL  X5, X4
 605  	PXOR   X4, X7
 606  	MOVO   X7, X8
 607  	PSLLL  $0x18, X8
 608  	PSRLL  $0x08, X7
 609  	PXOR   X8, X7
 610  	PADDL  X7, X6
 611  	PXOR   X6, X5
 612  	MOVO   X5, X8
 613  	PSLLL  $0x19, X8
 614  	PSRLL  $0x07, X5
 615  	PXOR   X8, X5
 616  	PSHUFL $0x39, X7, X7
 617  	PSHUFL $0x4e, X6, X6
 618  	PSHUFL $0x93, X5, X5
 619  	PADDL  464(BP), X4
 620  	PADDL  X5, X4
 621  	PXOR   X4, X7
 622  	MOVO   X7, X8
 623  	PSLLL  $0x10, X8
 624  	PSRLL  $0x10, X7
 625  	PXOR   X8, X7
 626  	PADDL  X7, X6
 627  	PXOR   X6, X5
 628  	MOVO   X5, X8
 629  	PSLLL  $0x14, X8
 630  	PSRLL  $0x0c, X5
 631  	PXOR   X8, X5
 632  	PADDL  480(BP), X4
 633  	PADDL  X5, X4
 634  	PXOR   X4, X7
 635  	MOVO   X7, X8
 636  	PSLLL  $0x18, X8
 637  	PSRLL  $0x08, X7
 638  	PXOR   X8, X7
 639  	PADDL  X7, X6
 640  	PXOR   X6, X5
 641  	MOVO   X5, X8
 642  	PSLLL  $0x19, X8
 643  	PSRLL  $0x07, X5
 644  	PXOR   X8, X5
 645  	PSHUFL $0x39, X5, X5
 646  	PSHUFL $0x4e, X6, X6
 647  	PSHUFL $0x93, X7, X7
 648  	PADDL  496(BP), X4
 649  	PADDL  X5, X4
 650  	PXOR   X4, X7
 651  	MOVO   X7, X8
 652  	PSLLL  $0x10, X8
 653  	PSRLL  $0x10, X7
 654  	PXOR   X8, X7
 655  	PADDL  X7, X6
 656  	PXOR   X6, X5
 657  	MOVO   X5, X8
 658  	PSLLL  $0x14, X8
 659  	PSRLL  $0x0c, X5
 660  	PXOR   X8, X5
 661  	PADDL  512(BP), X4
 662  	PADDL  X5, X4
 663  	PXOR   X4, X7
 664  	MOVO   X7, X8
 665  	PSLLL  $0x18, X8
 666  	PSRLL  $0x08, X7
 667  	PXOR   X8, X7
 668  	PADDL  X7, X6
 669  	PXOR   X6, X5
 670  	MOVO   X5, X8
 671  	PSLLL  $0x19, X8
 672  	PSRLL  $0x07, X5
 673  	PXOR   X8, X5
 674  	PSHUFL $0x39, X7, X7
 675  	PSHUFL $0x4e, X6, X6
 676  	PSHUFL $0x93, X5, X5
 677  	PADDL  528(BP), X4
 678  	PADDL  X5, X4
 679  	PXOR   X4, X7
 680  	MOVO   X7, X8
 681  	PSLLL  $0x10, X8
 682  	PSRLL  $0x10, X7
 683  	PXOR   X8, X7
 684  	PADDL  X7, X6
 685  	PXOR   X6, X5
 686  	MOVO   X5, X8
 687  	PSLLL  $0x14, X8
 688  	PSRLL  $0x0c, X5
 689  	PXOR   X8, X5
 690  	PADDL  544(BP), X4
 691  	PADDL  X5, X4
 692  	PXOR   X4, X7
 693  	MOVO   X7, X8
 694  	PSLLL  $0x18, X8
 695  	PSRLL  $0x08, X7
 696  	PXOR   X8, X7
 697  	PADDL  X7, X6
 698  	PXOR   X6, X5
 699  	MOVO   X5, X8
 700  	PSLLL  $0x19, X8
 701  	PSRLL  $0x07, X5
 702  	PXOR   X8, X5
 703  	PSHUFL $0x39, X5, X5
 704  	PSHUFL $0x4e, X6, X6
 705  	PSHUFL $0x93, X7, X7
 706  	PADDL  560(BP), X4
 707  	PADDL  X5, X4
 708  	PXOR   X4, X7
 709  	MOVO   X7, X8
 710  	PSLLL  $0x10, X8
 711  	PSRLL  $0x10, X7
 712  	PXOR   X8, X7
 713  	PADDL  X7, X6
 714  	PXOR   X6, X5
 715  	MOVO   X5, X8
 716  	PSLLL  $0x14, X8
 717  	PSRLL  $0x0c, X5
 718  	PXOR   X8, X5
 719  	PADDL  576(BP), X4
 720  	PADDL  X5, X4
 721  	PXOR   X4, X7
 722  	MOVO   X7, X8
 723  	PSLLL  $0x18, X8
 724  	PSRLL  $0x08, X7
 725  	PXOR   X8, X7
 726  	PADDL  X7, X6
 727  	PXOR   X6, X5
 728  	MOVO   X5, X8
 729  	PSLLL  $0x19, X8
 730  	PSRLL  $0x07, X5
 731  	PXOR   X8, X5
 732  	PSHUFL $0x39, X7, X7
 733  	PSHUFL $0x4e, X6, X6
 734  	PSHUFL $0x93, X5, X5
 735  	PADDL  592(BP), X4
 736  	PADDL  X5, X4
 737  	PXOR   X4, X7
 738  	MOVO   X7, X8
 739  	PSLLL  $0x10, X8
 740  	PSRLL  $0x10, X7
 741  	PXOR   X8, X7
 742  	PADDL  X7, X6
 743  	PXOR   X6, X5
 744  	MOVO   X5, X8
 745  	PSLLL  $0x14, X8
 746  	PSRLL  $0x0c, X5
 747  	PXOR   X8, X5
 748  	PADDL  608(BP), X4
 749  	PADDL  X5, X4
 750  	PXOR   X4, X7
 751  	MOVO   X7, X8
 752  	PSLLL  $0x18, X8
 753  	PSRLL  $0x08, X7
 754  	PXOR   X8, X7
 755  	PADDL  X7, X6
 756  	PXOR   X6, X5
 757  	MOVO   X5, X8
 758  	PSLLL  $0x19, X8
 759  	PSRLL  $0x07, X5
 760  	PXOR   X8, X5
 761  	PSHUFL $0x39, X5, X5
 762  	PSHUFL $0x4e, X6, X6
 763  	PSHUFL $0x93, X7, X7
 764  	PADDL  624(BP), X4
 765  	PADDL  X5, X4
 766  	PXOR   X4, X7
 767  	MOVO   X7, X8
 768  	PSLLL  $0x10, X8
 769  	PSRLL  $0x10, X7
 770  	PXOR   X8, X7
 771  	PADDL  X7, X6
 772  	PXOR   X6, X5
 773  	MOVO   X5, X8
 774  	PSLLL  $0x14, X8
 775  	PSRLL  $0x0c, X5
 776  	PXOR   X8, X5
 777  	PADDL  640(BP), X4
 778  	PADDL  X5, X4
 779  	PXOR   X4, X7
 780  	MOVO   X7, X8
 781  	PSLLL  $0x18, X8
 782  	PSRLL  $0x08, X7
 783  	PXOR   X8, X7
 784  	PADDL  X7, X6
 785  	PXOR   X6, X5
 786  	MOVO   X5, X8
 787  	PSLLL  $0x19, X8
 788  	PSRLL  $0x07, X5
 789  	PXOR   X8, X5
 790  	PSHUFL $0x39, X7, X7
 791  	PSHUFL $0x4e, X6, X6
 792  	PSHUFL $0x93, X5, X5
 793  	PXOR   X4, X0
 794  	PXOR   X5, X1
 795  	PXOR   X6, X0
 796  	PXOR   X7, X1
 797  	LEAQ   64(SI), SI
 798  	SUBQ   $0x40, DX
 799  	JNE    loop
 800  	MOVO   X15, (BP)
 801  	MOVQ   (BP), R9
 802  	MOVQ   R9, (BX)
 803  	MOVOU  X0, (AX)
 804  	MOVOU  X1, 16(AX)
 805  	RET
 806  
 807  DATA iv0<>+0(SB)/4, $0x6a09e667
 808  DATA iv0<>+4(SB)/4, $0xbb67ae85
 809  DATA iv0<>+8(SB)/4, $0x3c6ef372
 810  DATA iv0<>+12(SB)/4, $0xa54ff53a
 811  GLOBL iv0<>(SB), RODATA|NOPTR, $16
 812  
 813  DATA iv1<>+0(SB)/4, $0x510e527f
 814  DATA iv1<>+4(SB)/4, $0x9b05688c
 815  DATA iv1<>+8(SB)/4, $0x1f83d9ab
 816  DATA iv1<>+12(SB)/4, $0x5be0cd19
 817  GLOBL iv1<>(SB), RODATA|NOPTR, $16
 818  
 819  DATA counter<>+0(SB)/8, $0x0000000000000040
 820  DATA counter<>+8(SB)/8, $0x0000000000000000
 821  GLOBL counter<>(SB), RODATA|NOPTR, $16
 822  
 823  DATA rol16<>+0(SB)/8, $0x0504070601000302
 824  DATA rol16<>+8(SB)/8, $0x0d0c0f0e09080b0a
 825  GLOBL rol16<>(SB), RODATA|NOPTR, $16
 826  
 827  DATA rol8<>+0(SB)/8, $0x0407060500030201
 828  DATA rol8<>+8(SB)/8, $0x0c0f0e0d080b0a09
 829  GLOBL rol8<>(SB), RODATA|NOPTR, $16
 830  
 831  // func hashBlocksSSSE3(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte)
 832  // Requires: SSE2, SSSE3
 833  TEXT ·hashBlocksSSSE3(SB), $672-48
 834  	MOVQ  h+0(FP), AX
 835  	MOVQ  c+8(FP), BX
 836  	MOVL  flag+16(FP), CX
 837  	MOVQ  blocks_base+24(FP), SI
 838  	MOVQ  blocks_len+32(FP), DX
 839  	MOVQ  SP, BP
 840  	ADDQ  $0x0f, BP
 841  	ANDQ  $-16, BP
 842  	MOVQ  (BX), R9
 843  	MOVQ  R9, (BP)
 844  	MOVQ  CX, 8(BP)
 845  	MOVOU (AX), X0
 846  	MOVOU 16(AX), X1
 847  	MOVOU iv0<>+0(SB), X2
 848  	MOVOU iv1<>+0(SB), X3
 849  	MOVOU counter<>+0(SB), X12
 850  	MOVOU rol16<>+0(SB), X13
 851  	MOVOU rol8<>+0(SB), X14
 852  	MOVO  (BP), X15
 853  
 854  loop:
 855  	MOVO   X0, X4
 856  	MOVO   X1, X5
 857  	MOVO   X2, X6
 858  	MOVO   X3, X7
 859  	PADDQ  X12, X15
 860  	PXOR   X15, X7
 861  	MOVQ   (SI), R8
 862  	MOVQ   8(SI), R9
 863  	MOVQ   16(SI), R10
 864  	MOVQ   24(SI), R11
 865  	MOVQ   32(SI), R12
 866  	MOVQ   40(SI), R13
 867  	MOVQ   48(SI), R14
 868  	MOVQ   56(SI), R15
 869  	MOVL   R8, 16(BP)
 870  	MOVL   R8, 116(BP)
 871  	MOVL   R8, 164(BP)
 872  	MOVL   R8, 264(BP)
 873  	MOVL   R8, 288(BP)
 874  	MOVL   R8, 344(BP)
 875  	MOVL   R8, 432(BP)
 876  	MOVL   R8, 512(BP)
 877  	MOVL   R8, 540(BP)
 878  	MOVL   R8, 652(BP)
 879  	SHRQ   $0x20, R8
 880  	MOVL   R8, 32(BP)
 881  	MOVL   R8, 112(BP)
 882  	MOVL   R8, 200(BP)
 883  	MOVL   R8, 228(BP)
 884  	MOVL   R8, 320(BP)
 885  	MOVL   R8, 380(BP)
 886  	MOVL   R8, 404(BP)
 887  	MOVL   R8, 488(BP)
 888  	MOVL   R8, 568(BP)
 889  	MOVL   R8, 604(BP)
 890  	MOVL   R9, 20(BP)
 891  	MOVL   R9, 132(BP)
 892  	MOVL   R9, 168(BP)
 893  	MOVL   R9, 240(BP)
 894  	MOVL   R9, 280(BP)
 895  	MOVL   R9, 336(BP)
 896  	MOVL   R9, 456(BP)
 897  	MOVL   R9, 508(BP)
 898  	MOVL   R9, 576(BP)
 899  	MOVL   R9, 608(BP)
 900  	SHRQ   $0x20, R9
 901  	MOVL   R9, 36(BP)
 902  	MOVL   R9, 140(BP)
 903  	MOVL   R9, 180(BP)
 904  	MOVL   R9, 212(BP)
 905  	MOVL   R9, 316(BP)
 906  	MOVL   R9, 364(BP)
 907  	MOVL   R9, 452(BP)
 908  	MOVL   R9, 476(BP)
 909  	MOVL   R9, 552(BP)
 910  	MOVL   R9, 632(BP)
 911  	MOVL   R10, 24(BP)
 912  	MOVL   R10, 84(BP)
 913  	MOVL   R10, 204(BP)
 914  	MOVL   R10, 248(BP)
 915  	MOVL   R10, 296(BP)
 916  	MOVL   R10, 368(BP)
 917  	MOVL   R10, 412(BP)
 918  	MOVL   R10, 516(BP)
 919  	MOVL   R10, 584(BP)
 920  	MOVL   R10, 612(BP)
 921  	SHRQ   $0x20, R10
 922  	MOVL   R10, 40(BP)
 923  	MOVL   R10, 124(BP)
 924  	MOVL   R10, 152(BP)
 925  	MOVL   R10, 244(BP)
 926  	MOVL   R10, 276(BP)
 927  	MOVL   R10, 388(BP)
 928  	MOVL   R10, 416(BP)
 929  	MOVL   R10, 496(BP)
 930  	MOVL   R10, 588(BP)
 931  	MOVL   R10, 620(BP)
 932  	MOVL   R11, 28(BP)
 933  	MOVL   R11, 108(BP)
 934  	MOVL   R11, 196(BP)
 935  	MOVL   R11, 256(BP)
 936  	MOVL   R11, 312(BP)
 937  	MOVL   R11, 340(BP)
 938  	MOVL   R11, 436(BP)
 939  	MOVL   R11, 520(BP)
 940  	MOVL   R11, 528(BP)
 941  	MOVL   R11, 616(BP)
 942  	SHRQ   $0x20, R11
 943  	MOVL   R11, 44(BP)
 944  	MOVL   R11, 136(BP)
 945  	MOVL   R11, 184(BP)
 946  	MOVL   R11, 208(BP)
 947  	MOVL   R11, 292(BP)
 948  	MOVL   R11, 372(BP)
 949  	MOVL   R11, 448(BP)
 950  	MOVL   R11, 468(BP)
 951  	MOVL   R11, 580(BP)
 952  	MOVL   R11, 600(BP)
 953  	MOVL   R12, 48(BP)
 954  	MOVL   R12, 100(BP)
 955  	MOVL   R12, 160(BP)
 956  	MOVL   R12, 268(BP)
 957  	MOVL   R12, 328(BP)
 958  	MOVL   R12, 348(BP)
 959  	MOVL   R12, 444(BP)
 960  	MOVL   R12, 504(BP)
 961  	MOVL   R12, 556(BP)
 962  	MOVL   R12, 596(BP)
 963  	SHRQ   $0x20, R12
 964  	MOVL   R12, 64(BP)
 965  	MOVL   R12, 88(BP)
 966  	MOVL   R12, 188(BP)
 967  	MOVL   R12, 224(BP)
 968  	MOVL   R12, 272(BP)
 969  	MOVL   R12, 396(BP)
 970  	MOVL   R12, 440(BP)
 971  	MOVL   R12, 492(BP)
 972  	MOVL   R12, 548(BP)
 973  	MOVL   R12, 628(BP)
 974  	MOVL   R13, 52(BP)
 975  	MOVL   R13, 96(BP)
 976  	MOVL   R13, 176(BP)
 977  	MOVL   R13, 260(BP)
 978  	MOVL   R13, 284(BP)
 979  	MOVL   R13, 356(BP)
 980  	MOVL   R13, 428(BP)
 981  	MOVL   R13, 524(BP)
 982  	MOVL   R13, 572(BP)
 983  	MOVL   R13, 592(BP)
 984  	SHRQ   $0x20, R13
 985  	MOVL   R13, 68(BP)
 986  	MOVL   R13, 120(BP)
 987  	MOVL   R13, 144(BP)
 988  	MOVL   R13, 220(BP)
 989  	MOVL   R13, 308(BP)
 990  	MOVL   R13, 360(BP)
 991  	MOVL   R13, 460(BP)
 992  	MOVL   R13, 480(BP)
 993  	MOVL   R13, 536(BP)
 994  	MOVL   R13, 640(BP)
 995  	MOVL   R14, 56(BP)
 996  	MOVL   R14, 128(BP)
 997  	MOVL   R14, 148(BP)
 998  	MOVL   R14, 232(BP)
 999  	MOVL   R14, 324(BP)
1000  	MOVL   R14, 352(BP)
1001  	MOVL   R14, 400(BP)
1002  	MOVL   R14, 472(BP)
1003  	MOVL   R14, 560(BP)
1004  	MOVL   R14, 648(BP)
1005  	SHRQ   $0x20, R14
1006  	MOVL   R14, 72(BP)
1007  	MOVL   R14, 92(BP)
1008  	MOVL   R14, 172(BP)
1009  	MOVL   R14, 216(BP)
1010  	MOVL   R14, 332(BP)
1011  	MOVL   R14, 384(BP)
1012  	MOVL   R14, 424(BP)
1013  	MOVL   R14, 464(BP)
1014  	MOVL   R14, 564(BP)
1015  	MOVL   R14, 636(BP)
1016  	MOVL   R15, 60(BP)
1017  	MOVL   R15, 80(BP)
1018  	MOVL   R15, 192(BP)
1019  	MOVL   R15, 236(BP)
1020  	MOVL   R15, 304(BP)
1021  	MOVL   R15, 392(BP)
1022  	MOVL   R15, 408(BP)
1023  	MOVL   R15, 484(BP)
1024  	MOVL   R15, 532(BP)
1025  	MOVL   R15, 644(BP)
1026  	SHRQ   $0x20, R15
1027  	MOVL   R15, 76(BP)
1028  	MOVL   R15, 104(BP)
1029  	MOVL   R15, 156(BP)
1030  	MOVL   R15, 252(BP)
1031  	MOVL   R15, 300(BP)
1032  	MOVL   R15, 376(BP)
1033  	MOVL   R15, 420(BP)
1034  	MOVL   R15, 500(BP)
1035  	MOVL   R15, 544(BP)
1036  	MOVL   R15, 624(BP)
1037  	PADDL  16(BP), X4
1038  	PADDL  X5, X4
1039  	PXOR   X4, X7
1040  	PSHUFB X13, X7
1041  	PADDL  X7, X6
1042  	PXOR   X6, X5
1043  	MOVO   X5, X8
1044  	PSLLL  $0x14, X8
1045  	PSRLL  $0x0c, X5
1046  	PXOR   X8, X5
1047  	PADDL  32(BP), X4
1048  	PADDL  X5, X4
1049  	PXOR   X4, X7
1050  	PSHUFB X14, X7
1051  	PADDL  X7, X6
1052  	PXOR   X6, X5
1053  	MOVO   X5, X8
1054  	PSLLL  $0x19, X8
1055  	PSRLL  $0x07, X5
1056  	PXOR   X8, X5
1057  	PSHUFL $0x39, X5, X5
1058  	PSHUFL $0x4e, X6, X6
1059  	PSHUFL $0x93, X7, X7
1060  	PADDL  48(BP), X4
1061  	PADDL  X5, X4
1062  	PXOR   X4, X7
1063  	PSHUFB X13, X7
1064  	PADDL  X7, X6
1065  	PXOR   X6, X5
1066  	MOVO   X5, X8
1067  	PSLLL  $0x14, X8
1068  	PSRLL  $0x0c, X5
1069  	PXOR   X8, X5
1070  	PADDL  64(BP), X4
1071  	PADDL  X5, X4
1072  	PXOR   X4, X7
1073  	PSHUFB X14, X7
1074  	PADDL  X7, X6
1075  	PXOR   X6, X5
1076  	MOVO   X5, X8
1077  	PSLLL  $0x19, X8
1078  	PSRLL  $0x07, X5
1079  	PXOR   X8, X5
1080  	PSHUFL $0x39, X7, X7
1081  	PSHUFL $0x4e, X6, X6
1082  	PSHUFL $0x93, X5, X5
1083  	PADDL  80(BP), X4
1084  	PADDL  X5, X4
1085  	PXOR   X4, X7
1086  	PSHUFB X13, X7
1087  	PADDL  X7, X6
1088  	PXOR   X6, X5
1089  	MOVO   X5, X8
1090  	PSLLL  $0x14, X8
1091  	PSRLL  $0x0c, X5
1092  	PXOR   X8, X5
1093  	PADDL  96(BP), X4
1094  	PADDL  X5, X4
1095  	PXOR   X4, X7
1096  	PSHUFB X14, X7
1097  	PADDL  X7, X6
1098  	PXOR   X6, X5
1099  	MOVO   X5, X8
1100  	PSLLL  $0x19, X8
1101  	PSRLL  $0x07, X5
1102  	PXOR   X8, X5
1103  	PSHUFL $0x39, X5, X5
1104  	PSHUFL $0x4e, X6, X6
1105  	PSHUFL $0x93, X7, X7
1106  	PADDL  112(BP), X4
1107  	PADDL  X5, X4
1108  	PXOR   X4, X7
1109  	PSHUFB X13, X7
1110  	PADDL  X7, X6
1111  	PXOR   X6, X5
1112  	MOVO   X5, X8
1113  	PSLLL  $0x14, X8
1114  	PSRLL  $0x0c, X5
1115  	PXOR   X8, X5
1116  	PADDL  128(BP), X4
1117  	PADDL  X5, X4
1118  	PXOR   X4, X7
1119  	PSHUFB X14, X7
1120  	PADDL  X7, X6
1121  	PXOR   X6, X5
1122  	MOVO   X5, X8
1123  	PSLLL  $0x19, X8
1124  	PSRLL  $0x07, X5
1125  	PXOR   X8, X5
1126  	PSHUFL $0x39, X7, X7
1127  	PSHUFL $0x4e, X6, X6
1128  	PSHUFL $0x93, X5, X5
1129  	PADDL  144(BP), X4
1130  	PADDL  X5, X4
1131  	PXOR   X4, X7
1132  	PSHUFB X13, X7
1133  	PADDL  X7, X6
1134  	PXOR   X6, X5
1135  	MOVO   X5, X8
1136  	PSLLL  $0x14, X8
1137  	PSRLL  $0x0c, X5
1138  	PXOR   X8, X5
1139  	PADDL  160(BP), X4
1140  	PADDL  X5, X4
1141  	PXOR   X4, X7
1142  	PSHUFB X14, X7
1143  	PADDL  X7, X6
1144  	PXOR   X6, X5
1145  	MOVO   X5, X8
1146  	PSLLL  $0x19, X8
1147  	PSRLL  $0x07, X5
1148  	PXOR   X8, X5
1149  	PSHUFL $0x39, X5, X5
1150  	PSHUFL $0x4e, X6, X6
1151  	PSHUFL $0x93, X7, X7
1152  	PADDL  176(BP), X4
1153  	PADDL  X5, X4
1154  	PXOR   X4, X7
1155  	PSHUFB X13, X7
1156  	PADDL  X7, X6
1157  	PXOR   X6, X5
1158  	MOVO   X5, X8
1159  	PSLLL  $0x14, X8
1160  	PSRLL  $0x0c, X5
1161  	PXOR   X8, X5
1162  	PADDL  192(BP), X4
1163  	PADDL  X5, X4
1164  	PXOR   X4, X7
1165  	PSHUFB X14, X7
1166  	PADDL  X7, X6
1167  	PXOR   X6, X5
1168  	MOVO   X5, X8
1169  	PSLLL  $0x19, X8
1170  	PSRLL  $0x07, X5
1171  	PXOR   X8, X5
1172  	PSHUFL $0x39, X7, X7
1173  	PSHUFL $0x4e, X6, X6
1174  	PSHUFL $0x93, X5, X5
1175  	PADDL  208(BP), X4
1176  	PADDL  X5, X4
1177  	PXOR   X4, X7
1178  	PSHUFB X13, X7
1179  	PADDL  X7, X6
1180  	PXOR   X6, X5
1181  	MOVO   X5, X8
1182  	PSLLL  $0x14, X8
1183  	PSRLL  $0x0c, X5
1184  	PXOR   X8, X5
1185  	PADDL  224(BP), X4
1186  	PADDL  X5, X4
1187  	PXOR   X4, X7
1188  	PSHUFB X14, X7
1189  	PADDL  X7, X6
1190  	PXOR   X6, X5
1191  	MOVO   X5, X8
1192  	PSLLL  $0x19, X8
1193  	PSRLL  $0x07, X5
1194  	PXOR   X8, X5
1195  	PSHUFL $0x39, X5, X5
1196  	PSHUFL $0x4e, X6, X6
1197  	PSHUFL $0x93, X7, X7
1198  	PADDL  240(BP), X4
1199  	PADDL  X5, X4
1200  	PXOR   X4, X7
1201  	PSHUFB X13, X7
1202  	PADDL  X7, X6
1203  	PXOR   X6, X5
1204  	MOVO   X5, X8
1205  	PSLLL  $0x14, X8
1206  	PSRLL  $0x0c, X5
1207  	PXOR   X8, X5
1208  	PADDL  256(BP), X4
1209  	PADDL  X5, X4
1210  	PXOR   X4, X7
1211  	PSHUFB X14, X7
1212  	PADDL  X7, X6
1213  	PXOR   X6, X5
1214  	MOVO   X5, X8
1215  	PSLLL  $0x19, X8
1216  	PSRLL  $0x07, X5
1217  	PXOR   X8, X5
1218  	PSHUFL $0x39, X7, X7
1219  	PSHUFL $0x4e, X6, X6
1220  	PSHUFL $0x93, X5, X5
1221  	PADDL  272(BP), X4
1222  	PADDL  X5, X4
1223  	PXOR   X4, X7
1224  	PSHUFB X13, X7
1225  	PADDL  X7, X6
1226  	PXOR   X6, X5
1227  	MOVO   X5, X8
1228  	PSLLL  $0x14, X8
1229  	PSRLL  $0x0c, X5
1230  	PXOR   X8, X5
1231  	PADDL  288(BP), X4
1232  	PADDL  X5, X4
1233  	PXOR   X4, X7
1234  	PSHUFB X14, X7
1235  	PADDL  X7, X6
1236  	PXOR   X6, X5
1237  	MOVO   X5, X8
1238  	PSLLL  $0x19, X8
1239  	PSRLL  $0x07, X5
1240  	PXOR   X8, X5
1241  	PSHUFL $0x39, X5, X5
1242  	PSHUFL $0x4e, X6, X6
1243  	PSHUFL $0x93, X7, X7
1244  	PADDL  304(BP), X4
1245  	PADDL  X5, X4
1246  	PXOR   X4, X7
1247  	PSHUFB X13, X7
1248  	PADDL  X7, X6
1249  	PXOR   X6, X5
1250  	MOVO   X5, X8
1251  	PSLLL  $0x14, X8
1252  	PSRLL  $0x0c, X5
1253  	PXOR   X8, X5
1254  	PADDL  320(BP), X4
1255  	PADDL  X5, X4
1256  	PXOR   X4, X7
1257  	PSHUFB X14, X7
1258  	PADDL  X7, X6
1259  	PXOR   X6, X5
1260  	MOVO   X5, X8
1261  	PSLLL  $0x19, X8
1262  	PSRLL  $0x07, X5
1263  	PXOR   X8, X5
1264  	PSHUFL $0x39, X7, X7
1265  	PSHUFL $0x4e, X6, X6
1266  	PSHUFL $0x93, X5, X5
1267  	PADDL  336(BP), X4
1268  	PADDL  X5, X4
1269  	PXOR   X4, X7
1270  	PSHUFB X13, X7
1271  	PADDL  X7, X6
1272  	PXOR   X6, X5
1273  	MOVO   X5, X8
1274  	PSLLL  $0x14, X8
1275  	PSRLL  $0x0c, X5
1276  	PXOR   X8, X5
1277  	PADDL  352(BP), X4
1278  	PADDL  X5, X4
1279  	PXOR   X4, X7
1280  	PSHUFB X14, X7
1281  	PADDL  X7, X6
1282  	PXOR   X6, X5
1283  	MOVO   X5, X8
1284  	PSLLL  $0x19, X8
1285  	PSRLL  $0x07, X5
1286  	PXOR   X8, X5
1287  	PSHUFL $0x39, X5, X5
1288  	PSHUFL $0x4e, X6, X6
1289  	PSHUFL $0x93, X7, X7
1290  	PADDL  368(BP), X4
1291  	PADDL  X5, X4
1292  	PXOR   X4, X7
1293  	PSHUFB X13, X7
1294  	PADDL  X7, X6
1295  	PXOR   X6, X5
1296  	MOVO   X5, X8
1297  	PSLLL  $0x14, X8
1298  	PSRLL  $0x0c, X5
1299  	PXOR   X8, X5
1300  	PADDL  384(BP), X4
1301  	PADDL  X5, X4
1302  	PXOR   X4, X7
1303  	PSHUFB X14, X7
1304  	PADDL  X7, X6
1305  	PXOR   X6, X5
1306  	MOVO   X5, X8
1307  	PSLLL  $0x19, X8
1308  	PSRLL  $0x07, X5
1309  	PXOR   X8, X5
1310  	PSHUFL $0x39, X7, X7
1311  	PSHUFL $0x4e, X6, X6
1312  	PSHUFL $0x93, X5, X5
1313  	PADDL  400(BP), X4
1314  	PADDL  X5, X4
1315  	PXOR   X4, X7
1316  	PSHUFB X13, X7
1317  	PADDL  X7, X6
1318  	PXOR   X6, X5
1319  	MOVO   X5, X8
1320  	PSLLL  $0x14, X8
1321  	PSRLL  $0x0c, X5
1322  	PXOR   X8, X5
1323  	PADDL  416(BP), X4
1324  	PADDL  X5, X4
1325  	PXOR   X4, X7
1326  	PSHUFB X14, X7
1327  	PADDL  X7, X6
1328  	PXOR   X6, X5
1329  	MOVO   X5, X8
1330  	PSLLL  $0x19, X8
1331  	PSRLL  $0x07, X5
1332  	PXOR   X8, X5
1333  	PSHUFL $0x39, X5, X5
1334  	PSHUFL $0x4e, X6, X6
1335  	PSHUFL $0x93, X7, X7
1336  	PADDL  432(BP), X4
1337  	PADDL  X5, X4
1338  	PXOR   X4, X7
1339  	PSHUFB X13, X7
1340  	PADDL  X7, X6
1341  	PXOR   X6, X5
1342  	MOVO   X5, X8
1343  	PSLLL  $0x14, X8
1344  	PSRLL  $0x0c, X5
1345  	PXOR   X8, X5
1346  	PADDL  448(BP), X4
1347  	PADDL  X5, X4
1348  	PXOR   X4, X7
1349  	PSHUFB X14, X7
1350  	PADDL  X7, X6
1351  	PXOR   X6, X5
1352  	MOVO   X5, X8
1353  	PSLLL  $0x19, X8
1354  	PSRLL  $0x07, X5
1355  	PXOR   X8, X5
1356  	PSHUFL $0x39, X7, X7
1357  	PSHUFL $0x4e, X6, X6
1358  	PSHUFL $0x93, X5, X5
1359  	PADDL  464(BP), X4
1360  	PADDL  X5, X4
1361  	PXOR   X4, X7
1362  	PSHUFB X13, X7
1363  	PADDL  X7, X6
1364  	PXOR   X6, X5
1365  	MOVO   X5, X8
1366  	PSLLL  $0x14, X8
1367  	PSRLL  $0x0c, X5
1368  	PXOR   X8, X5
1369  	PADDL  480(BP), X4
1370  	PADDL  X5, X4
1371  	PXOR   X4, X7
1372  	PSHUFB X14, X7
1373  	PADDL  X7, X6
1374  	PXOR   X6, X5
1375  	MOVO   X5, X8
1376  	PSLLL  $0x19, X8
1377  	PSRLL  $0x07, X5
1378  	PXOR   X8, X5
1379  	PSHUFL $0x39, X5, X5
1380  	PSHUFL $0x4e, X6, X6
1381  	PSHUFL $0x93, X7, X7
1382  	PADDL  496(BP), X4
1383  	PADDL  X5, X4
1384  	PXOR   X4, X7
1385  	PSHUFB X13, X7
1386  	PADDL  X7, X6
1387  	PXOR   X6, X5
1388  	MOVO   X5, X8
1389  	PSLLL  $0x14, X8
1390  	PSRLL  $0x0c, X5
1391  	PXOR   X8, X5
1392  	PADDL  512(BP), X4
1393  	PADDL  X5, X4
1394  	PXOR   X4, X7
1395  	PSHUFB X14, X7
1396  	PADDL  X7, X6
1397  	PXOR   X6, X5
1398  	MOVO   X5, X8
1399  	PSLLL  $0x19, X8
1400  	PSRLL  $0x07, X5
1401  	PXOR   X8, X5
1402  	PSHUFL $0x39, X7, X7
1403  	PSHUFL $0x4e, X6, X6
1404  	PSHUFL $0x93, X5, X5
1405  	PADDL  528(BP), X4
1406  	PADDL  X5, X4
1407  	PXOR   X4, X7
1408  	PSHUFB X13, X7
1409  	PADDL  X7, X6
1410  	PXOR   X6, X5
1411  	MOVO   X5, X8
1412  	PSLLL  $0x14, X8
1413  	PSRLL  $0x0c, X5
1414  	PXOR   X8, X5
1415  	PADDL  544(BP), X4
1416  	PADDL  X5, X4
1417  	PXOR   X4, X7
1418  	PSHUFB X14, X7
1419  	PADDL  X7, X6
1420  	PXOR   X6, X5
1421  	MOVO   X5, X8
1422  	PSLLL  $0x19, X8
1423  	PSRLL  $0x07, X5
1424  	PXOR   X8, X5
1425  	PSHUFL $0x39, X5, X5
1426  	PSHUFL $0x4e, X6, X6
1427  	PSHUFL $0x93, X7, X7
1428  	PADDL  560(BP), X4
1429  	PADDL  X5, X4
1430  	PXOR   X4, X7
1431  	PSHUFB X13, X7
1432  	PADDL  X7, X6
1433  	PXOR   X6, X5
1434  	MOVO   X5, X8
1435  	PSLLL  $0x14, X8
1436  	PSRLL  $0x0c, X5
1437  	PXOR   X8, X5
1438  	PADDL  576(BP), X4
1439  	PADDL  X5, X4
1440  	PXOR   X4, X7
1441  	PSHUFB X14, X7
1442  	PADDL  X7, X6
1443  	PXOR   X6, X5
1444  	MOVO   X5, X8
1445  	PSLLL  $0x19, X8
1446  	PSRLL  $0x07, X5
1447  	PXOR   X8, X5
1448  	PSHUFL $0x39, X7, X7
1449  	PSHUFL $0x4e, X6, X6
1450  	PSHUFL $0x93, X5, X5
1451  	PADDL  592(BP), X4
1452  	PADDL  X5, X4
1453  	PXOR   X4, X7
1454  	PSHUFB X13, X7
1455  	PADDL  X7, X6
1456  	PXOR   X6, X5
1457  	MOVO   X5, X8
1458  	PSLLL  $0x14, X8
1459  	PSRLL  $0x0c, X5
1460  	PXOR   X8, X5
1461  	PADDL  608(BP), X4
1462  	PADDL  X5, X4
1463  	PXOR   X4, X7
1464  	PSHUFB X14, X7
1465  	PADDL  X7, X6
1466  	PXOR   X6, X5
1467  	MOVO   X5, X8
1468  	PSLLL  $0x19, X8
1469  	PSRLL  $0x07, X5
1470  	PXOR   X8, X5
1471  	PSHUFL $0x39, X5, X5
1472  	PSHUFL $0x4e, X6, X6
1473  	PSHUFL $0x93, X7, X7
1474  	PADDL  624(BP), X4
1475  	PADDL  X5, X4
1476  	PXOR   X4, X7
1477  	PSHUFB X13, X7
1478  	PADDL  X7, X6
1479  	PXOR   X6, X5
1480  	MOVO   X5, X8
1481  	PSLLL  $0x14, X8
1482  	PSRLL  $0x0c, X5
1483  	PXOR   X8, X5
1484  	PADDL  640(BP), X4
1485  	PADDL  X5, X4
1486  	PXOR   X4, X7
1487  	PSHUFB X14, X7
1488  	PADDL  X7, X6
1489  	PXOR   X6, X5
1490  	MOVO   X5, X8
1491  	PSLLL  $0x19, X8
1492  	PSRLL  $0x07, X5
1493  	PXOR   X8, X5
1494  	PSHUFL $0x39, X7, X7
1495  	PSHUFL $0x4e, X6, X6
1496  	PSHUFL $0x93, X5, X5
1497  	PXOR   X4, X0
1498  	PXOR   X5, X1
1499  	PXOR   X6, X0
1500  	PXOR   X7, X1
1501  	LEAQ   64(SI), SI
1502  	SUBQ   $0x40, DX
1503  	JNE    loop
1504  	MOVO   X15, (BP)
1505  	MOVQ   (BP), R9
1506  	MOVQ   R9, (BX)
1507  	MOVOU  X0, (AX)
1508  	MOVOU  X1, 16(AX)
1509  	RET
1510  
1511  // func hashBlocksSSE4(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte)
1512  // Requires: SSE2, SSE4.1, SSSE3
1513  TEXT ·hashBlocksSSE4(SB), $32-48
1514  	MOVQ  h+0(FP), AX
1515  	MOVQ  c+8(FP), BX
1516  	MOVL  flag+16(FP), CX
1517  	MOVQ  blocks_base+24(FP), SI
1518  	MOVQ  blocks_len+32(FP), DX
1519  	MOVQ  SP, BP
1520  	ADDQ  $0x0f, BP
1521  	ANDQ  $-16, BP
1522  	MOVQ  (BX), R9
1523  	MOVQ  R9, (BP)
1524  	MOVQ  CX, 8(BP)
1525  	MOVOU (AX), X0
1526  	MOVOU 16(AX), X1
1527  	MOVOU iv0<>+0(SB), X2
1528  	MOVOU iv1<>+0(SB), X3
1529  	MOVOU counter<>+0(SB), X12
1530  	MOVOU rol16<>+0(SB), X13
1531  	MOVOU rol8<>+0(SB), X14
1532  	MOVO  (BP), X15
1533  
1534  loop:
1535  	MOVO   X0, X4
1536  	MOVO   X1, X5
1537  	MOVO   X2, X6
1538  	MOVO   X3, X7
1539  	PADDQ  X12, X15
1540  	PXOR   X15, X7
1541  	MOVL   (SI), X8
1542  	PINSRD $0x01, 8(SI), X8
1543  	PINSRD $0x02, 16(SI), X8
1544  	PINSRD $0x03, 24(SI), X8
1545  	MOVL   4(SI), X9
1546  	PINSRD $0x01, 12(SI), X9
1547  	PINSRD $0x02, 20(SI), X9
1548  	PINSRD $0x03, 28(SI), X9
1549  	MOVL   32(SI), X10
1550  	PINSRD $0x01, 40(SI), X10
1551  	PINSRD $0x02, 48(SI), X10
1552  	PINSRD $0x03, 56(SI), X10
1553  	MOVL   36(SI), X11
1554  	PINSRD $0x01, 44(SI), X11
1555  	PINSRD $0x02, 52(SI), X11
1556  	PINSRD $0x03, 60(SI), X11
1557  	PADDL  X8, X4
1558  	PADDL  X5, X4
1559  	PXOR   X4, X7
1560  	PSHUFB X13, X7
1561  	PADDL  X7, X6
1562  	PXOR   X6, X5
1563  	MOVO   X5, X8
1564  	PSLLL  $0x14, X8
1565  	PSRLL  $0x0c, X5
1566  	PXOR   X8, X5
1567  	PADDL  X9, X4
1568  	PADDL  X5, X4
1569  	PXOR   X4, X7
1570  	PSHUFB X14, X7
1571  	PADDL  X7, X6
1572  	PXOR   X6, X5
1573  	MOVO   X5, X8
1574  	PSLLL  $0x19, X8
1575  	PSRLL  $0x07, X5
1576  	PXOR   X8, X5
1577  	PSHUFL $0x39, X5, X5
1578  	PSHUFL $0x4e, X6, X6
1579  	PSHUFL $0x93, X7, X7
1580  	PADDL  X10, X4
1581  	PADDL  X5, X4
1582  	PXOR   X4, X7
1583  	PSHUFB X13, X7
1584  	PADDL  X7, X6
1585  	PXOR   X6, X5
1586  	MOVO   X5, X8
1587  	PSLLL  $0x14, X8
1588  	PSRLL  $0x0c, X5
1589  	PXOR   X8, X5
1590  	PADDL  X11, X4
1591  	PADDL  X5, X4
1592  	PXOR   X4, X7
1593  	PSHUFB X14, X7
1594  	PADDL  X7, X6
1595  	PXOR   X6, X5
1596  	MOVO   X5, X8
1597  	PSLLL  $0x19, X8
1598  	PSRLL  $0x07, X5
1599  	PXOR   X8, X5
1600  	PSHUFL $0x39, X7, X7
1601  	PSHUFL $0x4e, X6, X6
1602  	PSHUFL $0x93, X5, X5
1603  	MOVL   56(SI), X8
1604  	PINSRD $0x01, 16(SI), X8
1605  	PINSRD $0x02, 36(SI), X8
1606  	PINSRD $0x03, 52(SI), X8
1607  	MOVL   40(SI), X9
1608  	PINSRD $0x01, 32(SI), X9
1609  	PINSRD $0x02, 60(SI), X9
1610  	PINSRD $0x03, 24(SI), X9
1611  	MOVL   4(SI), X10
1612  	PINSRD $0x01, (SI), X10
1613  	PINSRD $0x02, 44(SI), X10
1614  	PINSRD $0x03, 20(SI), X10
1615  	MOVL   48(SI), X11
1616  	PINSRD $0x01, 8(SI), X11
1617  	PINSRD $0x02, 28(SI), X11
1618  	PINSRD $0x03, 12(SI), X11
1619  	PADDL  X8, X4
1620  	PADDL  X5, X4
1621  	PXOR   X4, X7
1622  	PSHUFB X13, X7
1623  	PADDL  X7, X6
1624  	PXOR   X6, X5
1625  	MOVO   X5, X8
1626  	PSLLL  $0x14, X8
1627  	PSRLL  $0x0c, X5
1628  	PXOR   X8, X5
1629  	PADDL  X9, X4
1630  	PADDL  X5, X4
1631  	PXOR   X4, X7
1632  	PSHUFB X14, X7
1633  	PADDL  X7, X6
1634  	PXOR   X6, X5
1635  	MOVO   X5, X8
1636  	PSLLL  $0x19, X8
1637  	PSRLL  $0x07, X5
1638  	PXOR   X8, X5
1639  	PSHUFL $0x39, X5, X5
1640  	PSHUFL $0x4e, X6, X6
1641  	PSHUFL $0x93, X7, X7
1642  	PADDL  X10, X4
1643  	PADDL  X5, X4
1644  	PXOR   X4, X7
1645  	PSHUFB X13, X7
1646  	PADDL  X7, X6
1647  	PXOR   X6, X5
1648  	MOVO   X5, X8
1649  	PSLLL  $0x14, X8
1650  	PSRLL  $0x0c, X5
1651  	PXOR   X8, X5
1652  	PADDL  X11, X4
1653  	PADDL  X5, X4
1654  	PXOR   X4, X7
1655  	PSHUFB X14, X7
1656  	PADDL  X7, X6
1657  	PXOR   X6, X5
1658  	MOVO   X5, X8
1659  	PSLLL  $0x19, X8
1660  	PSRLL  $0x07, X5
1661  	PXOR   X8, X5
1662  	PSHUFL $0x39, X7, X7
1663  	PSHUFL $0x4e, X6, X6
1664  	PSHUFL $0x93, X5, X5
1665  	MOVL   44(SI), X8
1666  	PINSRD $0x01, 48(SI), X8
1667  	PINSRD $0x02, 20(SI), X8
1668  	PINSRD $0x03, 60(SI), X8
1669  	MOVL   32(SI), X9
1670  	PINSRD $0x01, (SI), X9
1671  	PINSRD $0x02, 8(SI), X9
1672  	PINSRD $0x03, 52(SI), X9
1673  	MOVL   40(SI), X10
1674  	PINSRD $0x01, 12(SI), X10
1675  	PINSRD $0x02, 28(SI), X10
1676  	PINSRD $0x03, 36(SI), X10
1677  	MOVL   56(SI), X11
1678  	PINSRD $0x01, 24(SI), X11
1679  	PINSRD $0x02, 4(SI), X11
1680  	PINSRD $0x03, 16(SI), X11
1681  	PADDL  X8, X4
1682  	PADDL  X5, X4
1683  	PXOR   X4, X7
1684  	PSHUFB X13, X7
1685  	PADDL  X7, X6
1686  	PXOR   X6, X5
1687  	MOVO   X5, X8
1688  	PSLLL  $0x14, X8
1689  	PSRLL  $0x0c, X5
1690  	PXOR   X8, X5
1691  	PADDL  X9, X4
1692  	PADDL  X5, X4
1693  	PXOR   X4, X7
1694  	PSHUFB X14, X7
1695  	PADDL  X7, X6
1696  	PXOR   X6, X5
1697  	MOVO   X5, X8
1698  	PSLLL  $0x19, X8
1699  	PSRLL  $0x07, X5
1700  	PXOR   X8, X5
1701  	PSHUFL $0x39, X5, X5
1702  	PSHUFL $0x4e, X6, X6
1703  	PSHUFL $0x93, X7, X7
1704  	PADDL  X10, X4
1705  	PADDL  X5, X4
1706  	PXOR   X4, X7
1707  	PSHUFB X13, X7
1708  	PADDL  X7, X6
1709  	PXOR   X6, X5
1710  	MOVO   X5, X8
1711  	PSLLL  $0x14, X8
1712  	PSRLL  $0x0c, X5
1713  	PXOR   X8, X5
1714  	PADDL  X11, X4
1715  	PADDL  X5, X4
1716  	PXOR   X4, X7
1717  	PSHUFB X14, X7
1718  	PADDL  X7, X6
1719  	PXOR   X6, X5
1720  	MOVO   X5, X8
1721  	PSLLL  $0x19, X8
1722  	PSRLL  $0x07, X5
1723  	PXOR   X8, X5
1724  	PSHUFL $0x39, X7, X7
1725  	PSHUFL $0x4e, X6, X6
1726  	PSHUFL $0x93, X5, X5
1727  	MOVL   28(SI), X8
1728  	PINSRD $0x01, 12(SI), X8
1729  	PINSRD $0x02, 52(SI), X8
1730  	PINSRD $0x03, 44(SI), X8
1731  	MOVL   36(SI), X9
1732  	PINSRD $0x01, 4(SI), X9
1733  	PINSRD $0x02, 48(SI), X9
1734  	PINSRD $0x03, 56(SI), X9
1735  	MOVL   8(SI), X10
1736  	PINSRD $0x01, 20(SI), X10
1737  	PINSRD $0x02, 16(SI), X10
1738  	PINSRD $0x03, 60(SI), X10
1739  	MOVL   24(SI), X11
1740  	PINSRD $0x01, 40(SI), X11
1741  	PINSRD $0x02, (SI), X11
1742  	PINSRD $0x03, 32(SI), X11
1743  	PADDL  X8, X4
1744  	PADDL  X5, X4
1745  	PXOR   X4, X7
1746  	PSHUFB X13, X7
1747  	PADDL  X7, X6
1748  	PXOR   X6, X5
1749  	MOVO   X5, X8
1750  	PSLLL  $0x14, X8
1751  	PSRLL  $0x0c, X5
1752  	PXOR   X8, X5
1753  	PADDL  X9, X4
1754  	PADDL  X5, X4
1755  	PXOR   X4, X7
1756  	PSHUFB X14, X7
1757  	PADDL  X7, X6
1758  	PXOR   X6, X5
1759  	MOVO   X5, X8
1760  	PSLLL  $0x19, X8
1761  	PSRLL  $0x07, X5
1762  	PXOR   X8, X5
1763  	PSHUFL $0x39, X5, X5
1764  	PSHUFL $0x4e, X6, X6
1765  	PSHUFL $0x93, X7, X7
1766  	PADDL  X10, X4
1767  	PADDL  X5, X4
1768  	PXOR   X4, X7
1769  	PSHUFB X13, X7
1770  	PADDL  X7, X6
1771  	PXOR   X6, X5
1772  	MOVO   X5, X8
1773  	PSLLL  $0x14, X8
1774  	PSRLL  $0x0c, X5
1775  	PXOR   X8, X5
1776  	PADDL  X11, X4
1777  	PADDL  X5, X4
1778  	PXOR   X4, X7
1779  	PSHUFB X14, X7
1780  	PADDL  X7, X6
1781  	PXOR   X6, X5
1782  	MOVO   X5, X8
1783  	PSLLL  $0x19, X8
1784  	PSRLL  $0x07, X5
1785  	PXOR   X8, X5
1786  	PSHUFL $0x39, X7, X7
1787  	PSHUFL $0x4e, X6, X6
1788  	PSHUFL $0x93, X5, X5
1789  	MOVL   36(SI), X8
1790  	PINSRD $0x01, 20(SI), X8
1791  	PINSRD $0x02, 8(SI), X8
1792  	PINSRD $0x03, 40(SI), X8
1793  	MOVL   (SI), X9
1794  	PINSRD $0x01, 28(SI), X9
1795  	PINSRD $0x02, 16(SI), X9
1796  	PINSRD $0x03, 60(SI), X9
1797  	MOVL   56(SI), X10
1798  	PINSRD $0x01, 44(SI), X10
1799  	PINSRD $0x02, 24(SI), X10
1800  	PINSRD $0x03, 12(SI), X10
1801  	MOVL   4(SI), X11
1802  	PINSRD $0x01, 48(SI), X11
1803  	PINSRD $0x02, 32(SI), X11
1804  	PINSRD $0x03, 52(SI), X11
1805  	PADDL  X8, X4
1806  	PADDL  X5, X4
1807  	PXOR   X4, X7
1808  	PSHUFB X13, X7
1809  	PADDL  X7, X6
1810  	PXOR   X6, X5
1811  	MOVO   X5, X8
1812  	PSLLL  $0x14, X8
1813  	PSRLL  $0x0c, X5
1814  	PXOR   X8, X5
1815  	PADDL  X9, X4
1816  	PADDL  X5, X4
1817  	PXOR   X4, X7
1818  	PSHUFB X14, X7
1819  	PADDL  X7, X6
1820  	PXOR   X6, X5
1821  	MOVO   X5, X8
1822  	PSLLL  $0x19, X8
1823  	PSRLL  $0x07, X5
1824  	PXOR   X8, X5
1825  	PSHUFL $0x39, X5, X5
1826  	PSHUFL $0x4e, X6, X6
1827  	PSHUFL $0x93, X7, X7
1828  	PADDL  X10, X4
1829  	PADDL  X5, X4
1830  	PXOR   X4, X7
1831  	PSHUFB X13, X7
1832  	PADDL  X7, X6
1833  	PXOR   X6, X5
1834  	MOVO   X5, X8
1835  	PSLLL  $0x14, X8
1836  	PSRLL  $0x0c, X5
1837  	PXOR   X8, X5
1838  	PADDL  X11, X4
1839  	PADDL  X5, X4
1840  	PXOR   X4, X7
1841  	PSHUFB X14, X7
1842  	PADDL  X7, X6
1843  	PXOR   X6, X5
1844  	MOVO   X5, X8
1845  	PSLLL  $0x19, X8
1846  	PSRLL  $0x07, X5
1847  	PXOR   X8, X5
1848  	PSHUFL $0x39, X7, X7
1849  	PSHUFL $0x4e, X6, X6
1850  	PSHUFL $0x93, X5, X5
1851  	MOVL   8(SI), X8
1852  	PINSRD $0x01, 24(SI), X8
1853  	PINSRD $0x02, (SI), X8
1854  	PINSRD $0x03, 32(SI), X8
1855  	MOVL   48(SI), X9
1856  	PINSRD $0x01, 40(SI), X9
1857  	PINSRD $0x02, 44(SI), X9
1858  	PINSRD $0x03, 12(SI), X9
1859  	MOVL   16(SI), X10
1860  	PINSRD $0x01, 28(SI), X10
1861  	PINSRD $0x02, 60(SI), X10
1862  	PINSRD $0x03, 4(SI), X10
1863  	MOVL   52(SI), X11
1864  	PINSRD $0x01, 20(SI), X11
1865  	PINSRD $0x02, 56(SI), X11
1866  	PINSRD $0x03, 36(SI), X11
1867  	PADDL  X8, X4
1868  	PADDL  X5, X4
1869  	PXOR   X4, X7
1870  	PSHUFB X13, X7
1871  	PADDL  X7, X6
1872  	PXOR   X6, X5
1873  	MOVO   X5, X8
1874  	PSLLL  $0x14, X8
1875  	PSRLL  $0x0c, X5
1876  	PXOR   X8, X5
1877  	PADDL  X9, X4
1878  	PADDL  X5, X4
1879  	PXOR   X4, X7
1880  	PSHUFB X14, X7
1881  	PADDL  X7, X6
1882  	PXOR   X6, X5
1883  	MOVO   X5, X8
1884  	PSLLL  $0x19, X8
1885  	PSRLL  $0x07, X5
1886  	PXOR   X8, X5
1887  	PSHUFL $0x39, X5, X5
1888  	PSHUFL $0x4e, X6, X6
1889  	PSHUFL $0x93, X7, X7
1890  	PADDL  X10, X4
1891  	PADDL  X5, X4
1892  	PXOR   X4, X7
1893  	PSHUFB X13, X7
1894  	PADDL  X7, X6
1895  	PXOR   X6, X5
1896  	MOVO   X5, X8
1897  	PSLLL  $0x14, X8
1898  	PSRLL  $0x0c, X5
1899  	PXOR   X8, X5
1900  	PADDL  X11, X4
1901  	PADDL  X5, X4
1902  	PXOR   X4, X7
1903  	PSHUFB X14, X7
1904  	PADDL  X7, X6
1905  	PXOR   X6, X5
1906  	MOVO   X5, X8
1907  	PSLLL  $0x19, X8
1908  	PSRLL  $0x07, X5
1909  	PXOR   X8, X5
1910  	PSHUFL $0x39, X7, X7
1911  	PSHUFL $0x4e, X6, X6
1912  	PSHUFL $0x93, X5, X5
1913  	MOVL   48(SI), X8
1914  	PINSRD $0x01, 4(SI), X8
1915  	PINSRD $0x02, 56(SI), X8
1916  	PINSRD $0x03, 16(SI), X8
1917  	MOVL   20(SI), X9
1918  	PINSRD $0x01, 60(SI), X9
1919  	PINSRD $0x02, 52(SI), X9
1920  	PINSRD $0x03, 40(SI), X9
1921  	MOVL   (SI), X10
1922  	PINSRD $0x01, 24(SI), X10
1923  	PINSRD $0x02, 36(SI), X10
1924  	PINSRD $0x03, 32(SI), X10
1925  	MOVL   28(SI), X11
1926  	PINSRD $0x01, 12(SI), X11
1927  	PINSRD $0x02, 8(SI), X11
1928  	PINSRD $0x03, 44(SI), X11
1929  	PADDL  X8, X4
1930  	PADDL  X5, X4
1931  	PXOR   X4, X7
1932  	PSHUFB X13, X7
1933  	PADDL  X7, X6
1934  	PXOR   X6, X5
1935  	MOVO   X5, X8
1936  	PSLLL  $0x14, X8
1937  	PSRLL  $0x0c, X5
1938  	PXOR   X8, X5
1939  	PADDL  X9, X4
1940  	PADDL  X5, X4
1941  	PXOR   X4, X7
1942  	PSHUFB X14, X7
1943  	PADDL  X7, X6
1944  	PXOR   X6, X5
1945  	MOVO   X5, X8
1946  	PSLLL  $0x19, X8
1947  	PSRLL  $0x07, X5
1948  	PXOR   X8, X5
1949  	PSHUFL $0x39, X5, X5
1950  	PSHUFL $0x4e, X6, X6
1951  	PSHUFL $0x93, X7, X7
1952  	PADDL  X10, X4
1953  	PADDL  X5, X4
1954  	PXOR   X4, X7
1955  	PSHUFB X13, X7
1956  	PADDL  X7, X6
1957  	PXOR   X6, X5
1958  	MOVO   X5, X8
1959  	PSLLL  $0x14, X8
1960  	PSRLL  $0x0c, X5
1961  	PXOR   X8, X5
1962  	PADDL  X11, X4
1963  	PADDL  X5, X4
1964  	PXOR   X4, X7
1965  	PSHUFB X14, X7
1966  	PADDL  X7, X6
1967  	PXOR   X6, X5
1968  	MOVO   X5, X8
1969  	PSLLL  $0x19, X8
1970  	PSRLL  $0x07, X5
1971  	PXOR   X8, X5
1972  	PSHUFL $0x39, X7, X7
1973  	PSHUFL $0x4e, X6, X6
1974  	PSHUFL $0x93, X5, X5
1975  	MOVL   52(SI), X8
1976  	PINSRD $0x01, 28(SI), X8
1977  	PINSRD $0x02, 48(SI), X8
1978  	PINSRD $0x03, 12(SI), X8
1979  	MOVL   44(SI), X9
1980  	PINSRD $0x01, 56(SI), X9
1981  	PINSRD $0x02, 4(SI), X9
1982  	PINSRD $0x03, 36(SI), X9
1983  	MOVL   20(SI), X10
1984  	PINSRD $0x01, 60(SI), X10
1985  	PINSRD $0x02, 32(SI), X10
1986  	PINSRD $0x03, 8(SI), X10
1987  	MOVL   (SI), X11
1988  	PINSRD $0x01, 16(SI), X11
1989  	PINSRD $0x02, 24(SI), X11
1990  	PINSRD $0x03, 40(SI), X11
1991  	PADDL  X8, X4
1992  	PADDL  X5, X4
1993  	PXOR   X4, X7
1994  	PSHUFB X13, X7
1995  	PADDL  X7, X6
1996  	PXOR   X6, X5
1997  	MOVO   X5, X8
1998  	PSLLL  $0x14, X8
1999  	PSRLL  $0x0c, X5
2000  	PXOR   X8, X5
2001  	PADDL  X9, X4
2002  	PADDL  X5, X4
2003  	PXOR   X4, X7
2004  	PSHUFB X14, X7
2005  	PADDL  X7, X6
2006  	PXOR   X6, X5
2007  	MOVO   X5, X8
2008  	PSLLL  $0x19, X8
2009  	PSRLL  $0x07, X5
2010  	PXOR   X8, X5
2011  	PSHUFL $0x39, X5, X5
2012  	PSHUFL $0x4e, X6, X6
2013  	PSHUFL $0x93, X7, X7
2014  	PADDL  X10, X4
2015  	PADDL  X5, X4
2016  	PXOR   X4, X7
2017  	PSHUFB X13, X7
2018  	PADDL  X7, X6
2019  	PXOR   X6, X5
2020  	MOVO   X5, X8
2021  	PSLLL  $0x14, X8
2022  	PSRLL  $0x0c, X5
2023  	PXOR   X8, X5
2024  	PADDL  X11, X4
2025  	PADDL  X5, X4
2026  	PXOR   X4, X7
2027  	PSHUFB X14, X7
2028  	PADDL  X7, X6
2029  	PXOR   X6, X5
2030  	MOVO   X5, X8
2031  	PSLLL  $0x19, X8
2032  	PSRLL  $0x07, X5
2033  	PXOR   X8, X5
2034  	PSHUFL $0x39, X7, X7
2035  	PSHUFL $0x4e, X6, X6
2036  	PSHUFL $0x93, X5, X5
2037  	MOVL   24(SI), X8
2038  	PINSRD $0x01, 56(SI), X8
2039  	PINSRD $0x02, 44(SI), X8
2040  	PINSRD $0x03, (SI), X8
2041  	MOVL   60(SI), X9
2042  	PINSRD $0x01, 36(SI), X9
2043  	PINSRD $0x02, 12(SI), X9
2044  	PINSRD $0x03, 32(SI), X9
2045  	MOVL   48(SI), X10
2046  	PINSRD $0x01, 52(SI), X10
2047  	PINSRD $0x02, 4(SI), X10
2048  	PINSRD $0x03, 40(SI), X10
2049  	MOVL   8(SI), X11
2050  	PINSRD $0x01, 28(SI), X11
2051  	PINSRD $0x02, 16(SI), X11
2052  	PINSRD $0x03, 20(SI), X11
2053  	PADDL  X8, X4
2054  	PADDL  X5, X4
2055  	PXOR   X4, X7
2056  	PSHUFB X13, X7
2057  	PADDL  X7, X6
2058  	PXOR   X6, X5
2059  	MOVO   X5, X8
2060  	PSLLL  $0x14, X8
2061  	PSRLL  $0x0c, X5
2062  	PXOR   X8, X5
2063  	PADDL  X9, X4
2064  	PADDL  X5, X4
2065  	PXOR   X4, X7
2066  	PSHUFB X14, X7
2067  	PADDL  X7, X6
2068  	PXOR   X6, X5
2069  	MOVO   X5, X8
2070  	PSLLL  $0x19, X8
2071  	PSRLL  $0x07, X5
2072  	PXOR   X8, X5
2073  	PSHUFL $0x39, X5, X5
2074  	PSHUFL $0x4e, X6, X6
2075  	PSHUFL $0x93, X7, X7
2076  	PADDL  X10, X4
2077  	PADDL  X5, X4
2078  	PXOR   X4, X7
2079  	PSHUFB X13, X7
2080  	PADDL  X7, X6
2081  	PXOR   X6, X5
2082  	MOVO   X5, X8
2083  	PSLLL  $0x14, X8
2084  	PSRLL  $0x0c, X5
2085  	PXOR   X8, X5
2086  	PADDL  X11, X4
2087  	PADDL  X5, X4
2088  	PXOR   X4, X7
2089  	PSHUFB X14, X7
2090  	PADDL  X7, X6
2091  	PXOR   X6, X5
2092  	MOVO   X5, X8
2093  	PSLLL  $0x19, X8
2094  	PSRLL  $0x07, X5
2095  	PXOR   X8, X5
2096  	PSHUFL $0x39, X7, X7
2097  	PSHUFL $0x4e, X6, X6
2098  	PSHUFL $0x93, X5, X5
2099  	MOVL   40(SI), X8
2100  	PINSRD $0x01, 32(SI), X8
2101  	PINSRD $0x02, 28(SI), X8
2102  	PINSRD $0x03, 4(SI), X8
2103  	MOVL   8(SI), X9
2104  	PINSRD $0x01, 16(SI), X9
2105  	PINSRD $0x02, 24(SI), X9
2106  	PINSRD $0x03, 20(SI), X9
2107  	MOVL   60(SI), X10
2108  	PINSRD $0x01, 36(SI), X10
2109  	PINSRD $0x02, 12(SI), X10
2110  	PINSRD $0x03, 52(SI), X10
2111  	MOVL   44(SI), X11
2112  	PINSRD $0x01, 56(SI), X11
2113  	PINSRD $0x02, 48(SI), X11
2114  	PINSRD $0x03, (SI), X11
2115  	PADDL  X8, X4
2116  	PADDL  X5, X4
2117  	PXOR   X4, X7
2118  	PSHUFB X13, X7
2119  	PADDL  X7, X6
2120  	PXOR   X6, X5
2121  	MOVO   X5, X8
2122  	PSLLL  $0x14, X8
2123  	PSRLL  $0x0c, X5
2124  	PXOR   X8, X5
2125  	PADDL  X9, X4
2126  	PADDL  X5, X4
2127  	PXOR   X4, X7
2128  	PSHUFB X14, X7
2129  	PADDL  X7, X6
2130  	PXOR   X6, X5
2131  	MOVO   X5, X8
2132  	PSLLL  $0x19, X8
2133  	PSRLL  $0x07, X5
2134  	PXOR   X8, X5
2135  	PSHUFL $0x39, X5, X5
2136  	PSHUFL $0x4e, X6, X6
2137  	PSHUFL $0x93, X7, X7
2138  	PADDL  X10, X4
2139  	PADDL  X5, X4
2140  	PXOR   X4, X7
2141  	PSHUFB X13, X7
2142  	PADDL  X7, X6
2143  	PXOR   X6, X5
2144  	MOVO   X5, X8
2145  	PSLLL  $0x14, X8
2146  	PSRLL  $0x0c, X5
2147  	PXOR   X8, X5
2148  	PADDL  X11, X4
2149  	PADDL  X5, X4
2150  	PXOR   X4, X7
2151  	PSHUFB X14, X7
2152  	PADDL  X7, X6
2153  	PXOR   X6, X5
2154  	MOVO   X5, X8
2155  	PSLLL  $0x19, X8
2156  	PSRLL  $0x07, X5
2157  	PXOR   X8, X5
2158  	PSHUFL $0x39, X7, X7
2159  	PSHUFL $0x4e, X6, X6
2160  	PSHUFL $0x93, X5, X5
2161  	PXOR   X4, X0
2162  	PXOR   X5, X1
2163  	PXOR   X6, X0
2164  	PXOR   X7, X1
2165  	LEAQ   64(SI), SI
2166  	SUBQ   $0x40, DX
2167  	JNE    loop
2168  	MOVO   X15, (BP)
2169  	MOVQ   (BP), R9
2170  	MOVQ   R9, (BX)
2171  	MOVOU  X0, (AX)
2172  	MOVOU  X1, 16(AX)
2173  	RET
2174