gcm_amd64.s raw

   1  // Code generated by command: go run gcm_amd64_asm.go -out ../../gcm_amd64.s -pkg aes. DO NOT EDIT.
   2  
   3  //go:build !purego
   4  
   5  #include "textflag.h"
   6  
   7  // func gcmAesFinish(productTable *[256]byte, tagMask *[16]byte, T *[16]byte, pLen uint64, dLen uint64)
   8  // Requires: PCLMULQDQ, SSE2, SSE4.1, SSSE3
   9  TEXT ·gcmAesFinish(SB), NOSPLIT, $0-40
  10  	MOVQ      productTable+0(FP), DI
  11  	MOVQ      tagMask+8(FP), SI
  12  	MOVQ      T+16(FP), DX
  13  	MOVQ      pLen+24(FP), AX
  14  	MOVQ      dLen+32(FP), CX
  15  	MOVOU     (DX), X8
  16  	MOVOU     (SI), X13
  17  	MOVOU     bswapMask<>+0(SB), X15
  18  	MOVOU     gcmPoly<>+0(SB), X14
  19  	SHLQ      $0x03, AX
  20  	SHLQ      $0x03, CX
  21  	MOVQ      AX, X0
  22  	PINSRQ    $0x01, CX, X0
  23  	PXOR      X8, X0
  24  	MOVOU     224(DI), X8
  25  	MOVOU     240(DI), X10
  26  	MOVOU     X8, X9
  27  	PCLMULQDQ $0x00, X0, X8
  28  	PCLMULQDQ $0x11, X0, X9
  29  	PSHUFD    $0x4e, X0, X11
  30  	PXOR      X0, X11
  31  	PCLMULQDQ $0x00, X11, X10
  32  	PXOR      X8, X10
  33  	PXOR      X9, X10
  34  	MOVOU     X10, X11
  35  	PSRLDQ    $0x08, X10
  36  	PSLLDQ    $0x08, X11
  37  	PXOR      X10, X9
  38  	PXOR      X11, X8
  39  	MOVOU     X14, X11
  40  	PCLMULQDQ $0x01, X8, X11
  41  	PSHUFD    $0x4e, X8, X8
  42  	PXOR      X11, X8
  43  	MOVOU     X14, X11
  44  	PCLMULQDQ $0x01, X8, X11
  45  	PSHUFD    $0x4e, X8, X8
  46  	PXOR      X11, X8
  47  	PXOR      X9, X8
  48  	PSHUFB    X15, X8
  49  	PXOR      X13, X8
  50  	MOVOU     X8, (DX)
  51  	RET
  52  
  53  DATA bswapMask<>+0(SB)/8, $0x08090a0b0c0d0e0f
  54  DATA bswapMask<>+8(SB)/8, $0x0001020304050607
  55  GLOBL bswapMask<>(SB), RODATA|NOPTR, $16
  56  
  57  DATA gcmPoly<>+0(SB)/8, $0x0000000000000001
  58  DATA gcmPoly<>+8(SB)/8, $0xc200000000000000
  59  GLOBL gcmPoly<>(SB), RODATA|NOPTR, $16
  60  
  61  // func gcmAesInit(productTable *[256]byte, ks []uint32)
  62  // Requires: AES, PCLMULQDQ, SSE2, SSSE3
  63  TEXT ·gcmAesInit(SB), NOSPLIT, $0-32
  64  	MOVQ  productTable+0(FP), DI
  65  	MOVQ  ks_base+8(FP), SI
  66  	MOVQ  ks_len+16(FP), DX
  67  	SHRQ  $0x02, DX
  68  	DECQ  DX
  69  	MOVOU bswapMask<>+0(SB), X15
  70  	MOVOU gcmPoly<>+0(SB), X14
  71  
  72  	// Encrypt block 0, with the AES key to generate the hash key H
  73  	MOVOU  (SI), X0
  74  	MOVOU  16(SI), X11
  75  	AESENC X11, X0
  76  	MOVOU  32(SI), X11
  77  	AESENC X11, X0
  78  	MOVOU  48(SI), X11
  79  	AESENC X11, X0
  80  	MOVOU  64(SI), X11
  81  	AESENC X11, X0
  82  	MOVOU  80(SI), X11
  83  	AESENC X11, X0
  84  	MOVOU  96(SI), X11
  85  	AESENC X11, X0
  86  	MOVOU  112(SI), X11
  87  	AESENC X11, X0
  88  	MOVOU  128(SI), X11
  89  	AESENC X11, X0
  90  	MOVOU  144(SI), X11
  91  	AESENC X11, X0
  92  	MOVOU  160(SI), X11
  93  	CMPQ   DX, $0x0c
  94  	JB     initEncLast
  95  	AESENC X11, X0
  96  	MOVOU  176(SI), X11
  97  	AESENC X11, X0
  98  	MOVOU  192(SI), X11
  99  	JE     initEncLast
 100  	AESENC X11, X0
 101  	MOVOU  208(SI), X11
 102  	AESENC X11, X0
 103  	MOVOU  224(SI), X11
 104  
 105  initEncLast:
 106  	AESENCLAST X11, X0
 107  	PSHUFB     X15, X0
 108  
 109  	// H * 2
 110  	PSHUFD $0xff, X0, X11
 111  	MOVOU  X0, X12
 112  	PSRAL  $0x1f, X11
 113  	PAND   X14, X11
 114  	PSRLL  $0x1f, X12
 115  	PSLLDQ $0x04, X12
 116  	PSLLL  $0x01, X0
 117  	PXOR   X11, X0
 118  	PXOR   X12, X0
 119  
 120  	// Karatsuba pre-computations
 121  	MOVOU  X0, 224(DI)
 122  	PSHUFD $0x4e, X0, X1
 123  	PXOR   X0, X1
 124  	MOVOU  X1, 240(DI)
 125  	MOVOU  X0, X2
 126  	MOVOU  X1, X3
 127  
 128  	// Now prepare powers of H and pre-computations for them
 129  	MOVQ $0x00000007, AX
 130  
 131  initLoop:
 132  	MOVOU     X2, X11
 133  	MOVOU     X2, X12
 134  	MOVOU     X3, X13
 135  	PCLMULQDQ $0x00, X0, X11
 136  	PCLMULQDQ $0x11, X0, X12
 137  	PCLMULQDQ $0x00, X1, X13
 138  	PXOR      X11, X13
 139  	PXOR      X12, X13
 140  	MOVOU     X13, X4
 141  	PSLLDQ    $0x08, X4
 142  	PSRLDQ    $0x08, X13
 143  	PXOR      X4, X11
 144  	PXOR      X13, X12
 145  	MOVOU     X14, X2
 146  	PCLMULQDQ $0x01, X11, X2
 147  	PSHUFD    $0x4e, X11, X11
 148  	PXOR      X2, X11
 149  	MOVOU     X14, X2
 150  	PCLMULQDQ $0x01, X11, X2
 151  	PSHUFD    $0x4e, X11, X11
 152  	PXOR      X11, X2
 153  	PXOR      X12, X2
 154  	MOVOU     X2, 192(DI)
 155  	PSHUFD    $0x4e, X2, X3
 156  	PXOR      X2, X3
 157  	MOVOU     X3, 208(DI)
 158  	DECQ      AX
 159  	LEAQ      -32(DI), DI
 160  	JNE       initLoop
 161  	RET
 162  
 163  // func gcmAesData(productTable *[256]byte, data []byte, T *[16]byte)
 164  // Requires: PCLMULQDQ, SSE2, SSE4.1, SSSE3
 165  TEXT ·gcmAesData(SB), NOSPLIT, $0-40
 166  	MOVQ  productTable+0(FP), DI
 167  	MOVQ  data_base+8(FP), SI
 168  	MOVQ  data_len+16(FP), DX
 169  	MOVQ  T+32(FP), CX
 170  	PXOR  X8, X8
 171  	MOVOU bswapMask<>+0(SB), X15
 172  	MOVOU gcmPoly<>+0(SB), X14
 173  	TESTQ DX, DX
 174  	JEQ   dataBail
 175  	CMPQ  DX, $0x0d
 176  	JE    dataTLS
 177  	CMPQ  DX, $0x80
 178  	JB    startSinglesLoop
 179  	JMP   dataOctaLoop
 180  
 181  dataTLS:
 182  	MOVOU  224(DI), X12
 183  	MOVOU  240(DI), X13
 184  	PXOR   X0, X0
 185  	MOVQ   (SI), X0
 186  	PINSRD $0x02, 8(SI), X0
 187  	PINSRB $0x0c, 12(SI), X0
 188  	XORQ   DX, DX
 189  	JMP    dataMul
 190  
 191  dataOctaLoop:
 192  	CMPQ      DX, $0x80
 193  	JB        startSinglesLoop
 194  	SUBQ      $0x80, DX
 195  	MOVOU     (SI), X0
 196  	MOVOU     16(SI), X1
 197  	MOVOU     32(SI), X2
 198  	MOVOU     48(SI), X3
 199  	MOVOU     64(SI), X4
 200  	MOVOU     80(SI), X5
 201  	MOVOU     96(SI), X6
 202  	MOVOU     112(SI), X7
 203  	LEAQ      128(SI), SI
 204  	PSHUFB    X15, X0
 205  	PSHUFB    X15, X1
 206  	PSHUFB    X15, X2
 207  	PSHUFB    X15, X3
 208  	PSHUFB    X15, X4
 209  	PSHUFB    X15, X5
 210  	PSHUFB    X15, X6
 211  	PSHUFB    X15, X7
 212  	PXOR      X8, X0
 213  	MOVOU     (DI), X8
 214  	MOVOU     16(DI), X10
 215  	MOVOU     X8, X9
 216  	PSHUFD    $0x4e, X0, X12
 217  	PXOR      X0, X12
 218  	PCLMULQDQ $0x00, X0, X8
 219  	PCLMULQDQ $0x11, X0, X9
 220  	PCLMULQDQ $0x00, X12, X10
 221  	MOVOU     32(DI), X12
 222  	MOVOU     X12, X13
 223  	PCLMULQDQ $0x00, X1, X12
 224  	PXOR      X12, X8
 225  	PCLMULQDQ $0x11, X1, X13
 226  	PXOR      X13, X9
 227  	PSHUFD    $0x4e, X1, X12
 228  	PXOR      X12, X1
 229  	MOVOU     48(DI), X12
 230  	PCLMULQDQ $0x00, X1, X12
 231  	PXOR      X12, X10
 232  	MOVOU     64(DI), X12
 233  	MOVOU     X12, X13
 234  	PCLMULQDQ $0x00, X2, X12
 235  	PXOR      X12, X8
 236  	PCLMULQDQ $0x11, X2, X13
 237  	PXOR      X13, X9
 238  	PSHUFD    $0x4e, X2, X12
 239  	PXOR      X12, X2
 240  	MOVOU     80(DI), X12
 241  	PCLMULQDQ $0x00, X2, X12
 242  	PXOR      X12, X10
 243  	MOVOU     96(DI), X12
 244  	MOVOU     X12, X13
 245  	PCLMULQDQ $0x00, X3, X12
 246  	PXOR      X12, X8
 247  	PCLMULQDQ $0x11, X3, X13
 248  	PXOR      X13, X9
 249  	PSHUFD    $0x4e, X3, X12
 250  	PXOR      X12, X3
 251  	MOVOU     112(DI), X12
 252  	PCLMULQDQ $0x00, X3, X12
 253  	PXOR      X12, X10
 254  	MOVOU     128(DI), X12
 255  	MOVOU     X12, X13
 256  	PCLMULQDQ $0x00, X4, X12
 257  	PXOR      X12, X8
 258  	PCLMULQDQ $0x11, X4, X13
 259  	PXOR      X13, X9
 260  	PSHUFD    $0x4e, X4, X12
 261  	PXOR      X12, X4
 262  	MOVOU     144(DI), X12
 263  	PCLMULQDQ $0x00, X4, X12
 264  	PXOR      X12, X10
 265  	MOVOU     160(DI), X12
 266  	MOVOU     X12, X13
 267  	PCLMULQDQ $0x00, X5, X12
 268  	PXOR      X12, X8
 269  	PCLMULQDQ $0x11, X5, X13
 270  	PXOR      X13, X9
 271  	PSHUFD    $0x4e, X5, X12
 272  	PXOR      X12, X5
 273  	MOVOU     176(DI), X12
 274  	PCLMULQDQ $0x00, X5, X12
 275  	PXOR      X12, X10
 276  	MOVOU     192(DI), X12
 277  	MOVOU     X12, X13
 278  	PCLMULQDQ $0x00, X6, X12
 279  	PXOR      X12, X8
 280  	PCLMULQDQ $0x11, X6, X13
 281  	PXOR      X13, X9
 282  	PSHUFD    $0x4e, X6, X12
 283  	PXOR      X12, X6
 284  	MOVOU     208(DI), X12
 285  	PCLMULQDQ $0x00, X6, X12
 286  	PXOR      X12, X10
 287  	MOVOU     224(DI), X12
 288  	MOVOU     X12, X13
 289  	PCLMULQDQ $0x00, X7, X12
 290  	PXOR      X12, X8
 291  	PCLMULQDQ $0x11, X7, X13
 292  	PXOR      X13, X9
 293  	PSHUFD    $0x4e, X7, X12
 294  	PXOR      X12, X7
 295  	MOVOU     240(DI), X12
 296  	PCLMULQDQ $0x00, X7, X12
 297  	PXOR      X12, X10
 298  	PXOR      X8, X10
 299  	PXOR      X9, X10
 300  	MOVOU     X10, X11
 301  	PSRLDQ    $0x08, X10
 302  	PSLLDQ    $0x08, X11
 303  	PXOR      X10, X9
 304  	PXOR      X11, X8
 305  	MOVOU     X14, X11
 306  	PCLMULQDQ $0x01, X8, X11
 307  	PSHUFD    $0x4e, X8, X8
 308  	PXOR      X11, X8
 309  	MOVOU     X14, X11
 310  	PCLMULQDQ $0x01, X8, X11
 311  	PSHUFD    $0x4e, X8, X8
 312  	PXOR      X11, X8
 313  	PXOR      X9, X8
 314  	JMP       dataOctaLoop
 315  
 316  startSinglesLoop:
 317  	MOVOU 224(DI), X12
 318  	MOVOU 240(DI), X13
 319  
 320  dataSinglesLoop:
 321  	CMPQ  DX, $0x10
 322  	JB    dataEnd
 323  	SUBQ  $0x10, DX
 324  	MOVOU (SI), X0
 325  
 326  dataMul:
 327  	PSHUFB    X15, X0
 328  	PXOR      X8, X0
 329  	MOVOU     X12, X8
 330  	MOVOU     X13, X10
 331  	MOVOU     X12, X9
 332  	PSHUFD    $0x4e, X0, X11
 333  	PXOR      X0, X11
 334  	PCLMULQDQ $0x00, X0, X8
 335  	PCLMULQDQ $0x11, X0, X9
 336  	PCLMULQDQ $0x00, X11, X10
 337  	PXOR      X8, X10
 338  	PXOR      X9, X10
 339  	MOVOU     X10, X11
 340  	PSRLDQ    $0x08, X10
 341  	PSLLDQ    $0x08, X11
 342  	PXOR      X10, X9
 343  	PXOR      X11, X8
 344  	MOVOU     X14, X11
 345  	PCLMULQDQ $0x01, X8, X11
 346  	PSHUFD    $0x4e, X8, X8
 347  	PXOR      X11, X8
 348  	MOVOU     X14, X11
 349  	PCLMULQDQ $0x01, X8, X11
 350  	PSHUFD    $0x4e, X8, X8
 351  	PXOR      X11, X8
 352  	PXOR      X9, X8
 353  	LEAQ      16(SI), SI
 354  	JMP       dataSinglesLoop
 355  
 356  dataEnd:
 357  	TESTQ DX, DX
 358  	JEQ   dataBail
 359  	PXOR  X0, X0
 360  	LEAQ  -1(SI)(DX*1), SI
 361  
 362  dataLoadLoop:
 363  	PSLLDQ $0x01, X0
 364  	PINSRB $0x00, (SI), X0
 365  	LEAQ   -1(SI), SI
 366  	DECQ   DX
 367  	JNE    dataLoadLoop
 368  	JMP    dataMul
 369  
 370  dataBail:
 371  	MOVOU X8, (CX)
 372  	RET
 373  
 374  // func gcmAesEnc(productTable *[256]byte, dst []byte, src []byte, ctr *[16]byte, T *[16]byte, ks []uint32)
 375  // Requires: AES, PCLMULQDQ, SSE2, SSE4.1, SSSE3
 376  TEXT ·gcmAesEnc(SB), $256-96
 377  	MOVQ   productTable+0(FP), DI
 378  	MOVQ   dst_base+8(FP), DX
 379  	MOVQ   src_base+32(FP), SI
 380  	MOVQ   src_len+40(FP), R9
 381  	MOVQ   ctr+56(FP), CX
 382  	MOVQ   T+64(FP), R8
 383  	MOVQ   ks_base+72(FP), AX
 384  	MOVQ   ks_len+80(FP), R13
 385  	SHRQ   $0x02, R13
 386  	DECQ   R13
 387  	MOVOU  bswapMask<>+0(SB), X15
 388  	MOVOU  gcmPoly<>+0(SB), X14
 389  	MOVOU  (R8), X8
 390  	PXOR   X9, X9
 391  	PXOR   X10, X10
 392  	MOVOU  (CX), X0
 393  	MOVL   12(CX), R10
 394  	MOVOU  (AX), X11
 395  	MOVL   12(AX), R12
 396  	BSWAPL R10
 397  	BSWAPL R12
 398  	PXOR   X0, X11
 399  	MOVOU  X11, 128(SP)
 400  	ADDL   $0x01, R10
 401  	MOVL   R10, R11
 402  	XORL   R12, R11
 403  	BSWAPL R11
 404  	MOVL   R11, 140(SP)
 405  	CMPQ   R9, $0x80
 406  	JB     gcmAesEncSingles
 407  	SUBQ   $0x80, R9
 408  
 409  	// We have at least 8 blocks to encrypt, prepare the rest of the counters
 410  	MOVOU  X11, 144(SP)
 411  	ADDL   $0x01, R10
 412  	MOVL   R10, R11
 413  	XORL   R12, R11
 414  	BSWAPL R11
 415  	MOVL   R11, 156(SP)
 416  	MOVOU  X11, 160(SP)
 417  	ADDL   $0x01, R10
 418  	MOVL   R10, R11
 419  	XORL   R12, R11
 420  	BSWAPL R11
 421  	MOVL   R11, 172(SP)
 422  	MOVOU  X11, 176(SP)
 423  	ADDL   $0x01, R10
 424  	MOVL   R10, R11
 425  	XORL   R12, R11
 426  	BSWAPL R11
 427  	MOVL   R11, 188(SP)
 428  	MOVOU  X11, 192(SP)
 429  	ADDL   $0x01, R10
 430  	MOVL   R10, R11
 431  	XORL   R12, R11
 432  	BSWAPL R11
 433  	MOVL   R11, 204(SP)
 434  	MOVOU  X11, 208(SP)
 435  	ADDL   $0x01, R10
 436  	MOVL   R10, R11
 437  	XORL   R12, R11
 438  	BSWAPL R11
 439  	MOVL   R11, 220(SP)
 440  	MOVOU  X11, 224(SP)
 441  	ADDL   $0x01, R10
 442  	MOVL   R10, R11
 443  	XORL   R12, R11
 444  	BSWAPL R11
 445  	MOVL   R11, 236(SP)
 446  	MOVOU  X11, 240(SP)
 447  	ADDL   $0x01, R10
 448  	MOVL   R10, R11
 449  	XORL   R12, R11
 450  	BSWAPL R11
 451  	MOVL   R11, 252(SP)
 452  	MOVOU  128(SP), X0
 453  	MOVOU  144(SP), X1
 454  	MOVOU  160(SP), X2
 455  	MOVOU  176(SP), X3
 456  	MOVOU  192(SP), X4
 457  	MOVOU  208(SP), X5
 458  	MOVOU  224(SP), X6
 459  	MOVOU  240(SP), X7
 460  	MOVOU  16(AX), X11
 461  	AESENC X11, X0
 462  	AESENC X11, X1
 463  	AESENC X11, X2
 464  	AESENC X11, X3
 465  	AESENC X11, X4
 466  	AESENC X11, X5
 467  	AESENC X11, X6
 468  	AESENC X11, X7
 469  	ADDL   $0x01, R10
 470  	MOVL   R10, R11
 471  	XORL   R12, R11
 472  	BSWAPL R11
 473  	MOVL   R11, 140(SP)
 474  	MOVOU  32(AX), X11
 475  	AESENC X11, X0
 476  	AESENC X11, X1
 477  	AESENC X11, X2
 478  	AESENC X11, X3
 479  	AESENC X11, X4
 480  	AESENC X11, X5
 481  	AESENC X11, X6
 482  	AESENC X11, X7
 483  	ADDL   $0x01, R10
 484  	MOVL   R10, R11
 485  	XORL   R12, R11
 486  	BSWAPL R11
 487  	MOVL   R11, 156(SP)
 488  	MOVOU  48(AX), X11
 489  	AESENC X11, X0
 490  	AESENC X11, X1
 491  	AESENC X11, X2
 492  	AESENC X11, X3
 493  	AESENC X11, X4
 494  	AESENC X11, X5
 495  	AESENC X11, X6
 496  	AESENC X11, X7
 497  	ADDL   $0x01, R10
 498  	MOVL   R10, R11
 499  	XORL   R12, R11
 500  	BSWAPL R11
 501  	MOVL   R11, 172(SP)
 502  	MOVOU  64(AX), X11
 503  	AESENC X11, X0
 504  	AESENC X11, X1
 505  	AESENC X11, X2
 506  	AESENC X11, X3
 507  	AESENC X11, X4
 508  	AESENC X11, X5
 509  	AESENC X11, X6
 510  	AESENC X11, X7
 511  	ADDL   $0x01, R10
 512  	MOVL   R10, R11
 513  	XORL   R12, R11
 514  	BSWAPL R11
 515  	MOVL   R11, 188(SP)
 516  	MOVOU  80(AX), X11
 517  	AESENC X11, X0
 518  	AESENC X11, X1
 519  	AESENC X11, X2
 520  	AESENC X11, X3
 521  	AESENC X11, X4
 522  	AESENC X11, X5
 523  	AESENC X11, X6
 524  	AESENC X11, X7
 525  	ADDL   $0x01, R10
 526  	MOVL   R10, R11
 527  	XORL   R12, R11
 528  	BSWAPL R11
 529  	MOVL   R11, 204(SP)
 530  	MOVOU  96(AX), X11
 531  	AESENC X11, X0
 532  	AESENC X11, X1
 533  	AESENC X11, X2
 534  	AESENC X11, X3
 535  	AESENC X11, X4
 536  	AESENC X11, X5
 537  	AESENC X11, X6
 538  	AESENC X11, X7
 539  	ADDL   $0x01, R10
 540  	MOVL   R10, R11
 541  	XORL   R12, R11
 542  	BSWAPL R11
 543  	MOVL   R11, 220(SP)
 544  	MOVOU  112(AX), X11
 545  	AESENC X11, X0
 546  	AESENC X11, X1
 547  	AESENC X11, X2
 548  	AESENC X11, X3
 549  	AESENC X11, X4
 550  	AESENC X11, X5
 551  	AESENC X11, X6
 552  	AESENC X11, X7
 553  	ADDL   $0x01, R10
 554  	MOVL   R10, R11
 555  	XORL   R12, R11
 556  	BSWAPL R11
 557  	MOVL   R11, 236(SP)
 558  	MOVOU  128(AX), X11
 559  	AESENC X11, X0
 560  	AESENC X11, X1
 561  	AESENC X11, X2
 562  	AESENC X11, X3
 563  	AESENC X11, X4
 564  	AESENC X11, X5
 565  	AESENC X11, X6
 566  	AESENC X11, X7
 567  	ADDL   $0x01, R10
 568  	MOVL   R10, R11
 569  	XORL   R12, R11
 570  	BSWAPL R11
 571  	MOVL   R11, 252(SP)
 572  	MOVOU  144(AX), X11
 573  	AESENC X11, X0
 574  	AESENC X11, X1
 575  	AESENC X11, X2
 576  	AESENC X11, X3
 577  	AESENC X11, X4
 578  	AESENC X11, X5
 579  	AESENC X11, X6
 580  	AESENC X11, X7
 581  	MOVOU  160(AX), X11
 582  	CMPQ   R13, $0x0c
 583  	JB     encLast1
 584  	AESENC X11, X0
 585  	AESENC X11, X1
 586  	AESENC X11, X2
 587  	AESENC X11, X3
 588  	AESENC X11, X4
 589  	AESENC X11, X5
 590  	AESENC X11, X6
 591  	AESENC X11, X7
 592  	MOVOU  176(AX), X11
 593  	AESENC X11, X0
 594  	AESENC X11, X1
 595  	AESENC X11, X2
 596  	AESENC X11, X3
 597  	AESENC X11, X4
 598  	AESENC X11, X5
 599  	AESENC X11, X6
 600  	AESENC X11, X7
 601  	MOVOU  192(AX), X11
 602  	JE     encLast1
 603  	AESENC X11, X0
 604  	AESENC X11, X1
 605  	AESENC X11, X2
 606  	AESENC X11, X3
 607  	AESENC X11, X4
 608  	AESENC X11, X5
 609  	AESENC X11, X6
 610  	AESENC X11, X7
 611  	MOVOU  208(AX), X11
 612  	AESENC X11, X0
 613  	AESENC X11, X1
 614  	AESENC X11, X2
 615  	AESENC X11, X3
 616  	AESENC X11, X4
 617  	AESENC X11, X5
 618  	AESENC X11, X6
 619  	AESENC X11, X7
 620  	MOVOU  224(AX), X11
 621  
 622  encLast1:
 623  	AESENCLAST X11, X0
 624  	AESENCLAST X11, X1
 625  	AESENCLAST X11, X2
 626  	AESENCLAST X11, X3
 627  	AESENCLAST X11, X4
 628  	AESENCLAST X11, X5
 629  	AESENCLAST X11, X6
 630  	AESENCLAST X11, X7
 631  	MOVOU      (SI), X11
 632  	PXOR       X11, X0
 633  	MOVOU      16(SI), X11
 634  	PXOR       X11, X1
 635  	MOVOU      32(SI), X11
 636  	PXOR       X11, X2
 637  	MOVOU      48(SI), X11
 638  	PXOR       X11, X3
 639  	MOVOU      64(SI), X11
 640  	PXOR       X11, X4
 641  	MOVOU      80(SI), X11
 642  	PXOR       X11, X5
 643  	MOVOU      96(SI), X11
 644  	PXOR       X11, X6
 645  	MOVOU      112(SI), X11
 646  	PXOR       X11, X7
 647  	MOVOU      X0, (DX)
 648  	PSHUFB     X15, X0
 649  	PXOR       X8, X0
 650  	MOVOU      X1, 16(DX)
 651  	PSHUFB     X15, X1
 652  	MOVOU      X2, 32(DX)
 653  	PSHUFB     X15, X2
 654  	MOVOU      X3, 48(DX)
 655  	PSHUFB     X15, X3
 656  	MOVOU      X4, 64(DX)
 657  	PSHUFB     X15, X4
 658  	MOVOU      X5, 80(DX)
 659  	PSHUFB     X15, X5
 660  	MOVOU      X6, 96(DX)
 661  	PSHUFB     X15, X6
 662  	MOVOU      X7, 112(DX)
 663  	PSHUFB     X15, X7
 664  	MOVOU      X0, (SP)
 665  	MOVOU      X1, 16(SP)
 666  	MOVOU      X2, 32(SP)
 667  	MOVOU      X3, 48(SP)
 668  	MOVOU      X4, 64(SP)
 669  	MOVOU      X5, 80(SP)
 670  	MOVOU      X6, 96(SP)
 671  	MOVOU      X7, 112(SP)
 672  	LEAQ       128(SI), SI
 673  	LEAQ       128(DX), DX
 674  
 675  gcmAesEncOctetsLoop:
 676  	CMPQ      R9, $0x80
 677  	JB        gcmAesEncOctetsEnd
 678  	SUBQ      $0x80, R9
 679  	MOVOU     128(SP), X0
 680  	MOVOU     144(SP), X1
 681  	MOVOU     160(SP), X2
 682  	MOVOU     176(SP), X3
 683  	MOVOU     192(SP), X4
 684  	MOVOU     208(SP), X5
 685  	MOVOU     224(SP), X6
 686  	MOVOU     240(SP), X7
 687  	MOVOU     (SP), X11
 688  	PSHUFD    $0x4e, X11, X12
 689  	PXOR      X11, X12
 690  	MOVOU     (DI), X8
 691  	MOVOU     16(DI), X10
 692  	MOVOU     X8, X9
 693  	PCLMULQDQ $0x00, X12, X10
 694  	PCLMULQDQ $0x00, X11, X8
 695  	PCLMULQDQ $0x11, X11, X9
 696  	MOVOU     16(AX), X11
 697  	AESENC    X11, X0
 698  	AESENC    X11, X1
 699  	AESENC    X11, X2
 700  	AESENC    X11, X3
 701  	MOVOU     32(DI), X12
 702  	MOVOU     X12, X13
 703  	AESENC    X11, X4
 704  	AESENC    X11, X5
 705  	AESENC    X11, X6
 706  	AESENC    X11, X7
 707  	MOVOU     16(SP), X11
 708  	PCLMULQDQ $0x00, X11, X12
 709  	PXOR      X12, X8
 710  	PSHUFD    $0x4e, X11, X12
 711  	PCLMULQDQ $0x11, X11, X13
 712  	PXOR      X12, X11
 713  	PXOR      X13, X9
 714  	MOVOU     48(DI), X13
 715  	PCLMULQDQ $0x00, X13, X11
 716  	PXOR      X11, X10
 717  	ADDL      $0x01, R10
 718  	MOVL      R10, R11
 719  	XORL      R12, R11
 720  	BSWAPL    R11
 721  	MOVL      R11, 140(SP)
 722  	MOVOU     32(AX), X11
 723  	AESENC    X11, X0
 724  	AESENC    X11, X1
 725  	AESENC    X11, X2
 726  	AESENC    X11, X3
 727  	MOVOU     64(DI), X12
 728  	MOVOU     X12, X13
 729  	AESENC    X11, X4
 730  	AESENC    X11, X5
 731  	AESENC    X11, X6
 732  	AESENC    X11, X7
 733  	MOVOU     32(SP), X11
 734  	PCLMULQDQ $0x00, X11, X12
 735  	PXOR      X12, X8
 736  	PSHUFD    $0x4e, X11, X12
 737  	PCLMULQDQ $0x11, X11, X13
 738  	PXOR      X12, X11
 739  	PXOR      X13, X9
 740  	MOVOU     80(DI), X13
 741  	PCLMULQDQ $0x00, X13, X11
 742  	PXOR      X11, X10
 743  	ADDL      $0x01, R10
 744  	MOVL      R10, R11
 745  	XORL      R12, R11
 746  	BSWAPL    R11
 747  	MOVL      R11, 156(SP)
 748  	MOVOU     48(AX), X11
 749  	AESENC    X11, X0
 750  	AESENC    X11, X1
 751  	AESENC    X11, X2
 752  	AESENC    X11, X3
 753  	MOVOU     96(DI), X12
 754  	MOVOU     X12, X13
 755  	AESENC    X11, X4
 756  	AESENC    X11, X5
 757  	AESENC    X11, X6
 758  	AESENC    X11, X7
 759  	MOVOU     48(SP), X11
 760  	PCLMULQDQ $0x00, X11, X12
 761  	PXOR      X12, X8
 762  	PSHUFD    $0x4e, X11, X12
 763  	PCLMULQDQ $0x11, X11, X13
 764  	PXOR      X12, X11
 765  	PXOR      X13, X9
 766  	MOVOU     112(DI), X13
 767  	PCLMULQDQ $0x00, X13, X11
 768  	PXOR      X11, X10
 769  	ADDL      $0x01, R10
 770  	MOVL      R10, R11
 771  	XORL      R12, R11
 772  	BSWAPL    R11
 773  	MOVL      R11, 172(SP)
 774  	MOVOU     64(AX), X11
 775  	AESENC    X11, X0
 776  	AESENC    X11, X1
 777  	AESENC    X11, X2
 778  	AESENC    X11, X3
 779  	MOVOU     128(DI), X12
 780  	MOVOU     X12, X13
 781  	AESENC    X11, X4
 782  	AESENC    X11, X5
 783  	AESENC    X11, X6
 784  	AESENC    X11, X7
 785  	MOVOU     64(SP), X11
 786  	PCLMULQDQ $0x00, X11, X12
 787  	PXOR      X12, X8
 788  	PSHUFD    $0x4e, X11, X12
 789  	PCLMULQDQ $0x11, X11, X13
 790  	PXOR      X12, X11
 791  	PXOR      X13, X9
 792  	MOVOU     144(DI), X13
 793  	PCLMULQDQ $0x00, X13, X11
 794  	PXOR      X11, X10
 795  	ADDL      $0x01, R10
 796  	MOVL      R10, R11
 797  	XORL      R12, R11
 798  	BSWAPL    R11
 799  	MOVL      R11, 188(SP)
 800  	MOVOU     80(AX), X11
 801  	AESENC    X11, X0
 802  	AESENC    X11, X1
 803  	AESENC    X11, X2
 804  	AESENC    X11, X3
 805  	MOVOU     160(DI), X12
 806  	MOVOU     X12, X13
 807  	AESENC    X11, X4
 808  	AESENC    X11, X5
 809  	AESENC    X11, X6
 810  	AESENC    X11, X7
 811  	MOVOU     80(SP), X11
 812  	PCLMULQDQ $0x00, X11, X12
 813  	PXOR      X12, X8
 814  	PSHUFD    $0x4e, X11, X12
 815  	PCLMULQDQ $0x11, X11, X13
 816  	PXOR      X12, X11
 817  	PXOR      X13, X9
 818  	MOVOU     176(DI), X13
 819  	PCLMULQDQ $0x00, X13, X11
 820  	PXOR      X11, X10
 821  	ADDL      $0x01, R10
 822  	MOVL      R10, R11
 823  	XORL      R12, R11
 824  	BSWAPL    R11
 825  	MOVL      R11, 204(SP)
 826  	MOVOU     96(AX), X11
 827  	AESENC    X11, X0
 828  	AESENC    X11, X1
 829  	AESENC    X11, X2
 830  	AESENC    X11, X3
 831  	MOVOU     192(DI), X12
 832  	MOVOU     X12, X13
 833  	AESENC    X11, X4
 834  	AESENC    X11, X5
 835  	AESENC    X11, X6
 836  	AESENC    X11, X7
 837  	MOVOU     96(SP), X11
 838  	PCLMULQDQ $0x00, X11, X12
 839  	PXOR      X12, X8
 840  	PSHUFD    $0x4e, X11, X12
 841  	PCLMULQDQ $0x11, X11, X13
 842  	PXOR      X12, X11
 843  	PXOR      X13, X9
 844  	MOVOU     208(DI), X13
 845  	PCLMULQDQ $0x00, X13, X11
 846  	PXOR      X11, X10
 847  	ADDL      $0x01, R10
 848  	MOVL      R10, R11
 849  	XORL      R12, R11
 850  	BSWAPL    R11
 851  	MOVL      R11, 220(SP)
 852  	MOVOU     112(AX), X11
 853  	AESENC    X11, X0
 854  	AESENC    X11, X1
 855  	AESENC    X11, X2
 856  	AESENC    X11, X3
 857  	MOVOU     224(DI), X12
 858  	MOVOU     X12, X13
 859  	AESENC    X11, X4
 860  	AESENC    X11, X5
 861  	AESENC    X11, X6
 862  	AESENC    X11, X7
 863  	MOVOU     112(SP), X11
 864  	PCLMULQDQ $0x00, X11, X12
 865  	PXOR      X12, X8
 866  	PSHUFD    $0x4e, X11, X12
 867  	PCLMULQDQ $0x11, X11, X13
 868  	PXOR      X12, X11
 869  	PXOR      X13, X9
 870  	MOVOU     240(DI), X13
 871  	PCLMULQDQ $0x00, X13, X11
 872  	PXOR      X11, X10
 873  	ADDL      $0x01, R10
 874  	MOVL      R10, R11
 875  	XORL      R12, R11
 876  	BSWAPL    R11
 877  	MOVL      R11, 236(SP)
 878  	MOVOU     128(AX), X11
 879  	AESENC    X11, X0
 880  	AESENC    X11, X1
 881  	AESENC    X11, X2
 882  	AESENC    X11, X3
 883  	AESENC    X11, X4
 884  	AESENC    X11, X5
 885  	AESENC    X11, X6
 886  	AESENC    X11, X7
 887  	ADDL      $0x01, R10
 888  	MOVL      R10, R11
 889  	XORL      R12, R11
 890  	BSWAPL    R11
 891  	MOVL      R11, 252(SP)
 892  	PXOR      X8, X10
 893  	PXOR      X9, X10
 894  	MOVOU     X10, X11
 895  	PSRLDQ    $0x08, X10
 896  	PSLLDQ    $0x08, X11
 897  	PXOR      X10, X9
 898  	PXOR      X11, X8
 899  	MOVOU     X14, X11
 900  	PCLMULQDQ $0x01, X8, X11
 901  	PSHUFD    $0x4e, X8, X8
 902  	PXOR      X11, X8
 903  	MOVOU     144(AX), X11
 904  	AESENC    X11, X0
 905  	AESENC    X11, X1
 906  	AESENC    X11, X2
 907  	AESENC    X11, X3
 908  	AESENC    X11, X4
 909  	AESENC    X11, X5
 910  	AESENC    X11, X6
 911  	AESENC    X11, X7
 912  	MOVOU     X14, X11
 913  	PCLMULQDQ $0x01, X8, X11
 914  	PSHUFD    $0x4e, X8, X8
 915  	PXOR      X11, X8
 916  	PXOR      X9, X8
 917  	MOVOU     160(AX), X11
 918  	CMPQ      R13, $0x0c
 919  	JB        encLast2
 920  	AESENC    X11, X0
 921  	AESENC    X11, X1
 922  	AESENC    X11, X2
 923  	AESENC    X11, X3
 924  	AESENC    X11, X4
 925  	AESENC    X11, X5
 926  	AESENC    X11, X6
 927  	AESENC    X11, X7
 928  	MOVOU     176(AX), X11
 929  	AESENC    X11, X0
 930  	AESENC    X11, X1
 931  	AESENC    X11, X2
 932  	AESENC    X11, X3
 933  	AESENC    X11, X4
 934  	AESENC    X11, X5
 935  	AESENC    X11, X6
 936  	AESENC    X11, X7
 937  	MOVOU     192(AX), X11
 938  	JE        encLast2
 939  	AESENC    X11, X0
 940  	AESENC    X11, X1
 941  	AESENC    X11, X2
 942  	AESENC    X11, X3
 943  	AESENC    X11, X4
 944  	AESENC    X11, X5
 945  	AESENC    X11, X6
 946  	AESENC    X11, X7
 947  	MOVOU     208(AX), X11
 948  	AESENC    X11, X0
 949  	AESENC    X11, X1
 950  	AESENC    X11, X2
 951  	AESENC    X11, X3
 952  	AESENC    X11, X4
 953  	AESENC    X11, X5
 954  	AESENC    X11, X6
 955  	AESENC    X11, X7
 956  	MOVOU     224(AX), X11
 957  
 958  encLast2:
 959  	AESENCLAST X11, X0
 960  	AESENCLAST X11, X1
 961  	AESENCLAST X11, X2
 962  	AESENCLAST X11, X3
 963  	AESENCLAST X11, X4
 964  	AESENCLAST X11, X5
 965  	AESENCLAST X11, X6
 966  	AESENCLAST X11, X7
 967  	MOVOU      (SI), X11
 968  	PXOR       X11, X0
 969  	MOVOU      16(SI), X11
 970  	PXOR       X11, X1
 971  	MOVOU      32(SI), X11
 972  	PXOR       X11, X2
 973  	MOVOU      48(SI), X11
 974  	PXOR       X11, X3
 975  	MOVOU      64(SI), X11
 976  	PXOR       X11, X4
 977  	MOVOU      80(SI), X11
 978  	PXOR       X11, X5
 979  	MOVOU      96(SI), X11
 980  	PXOR       X11, X6
 981  	MOVOU      112(SI), X11
 982  	PXOR       X11, X7
 983  	MOVOU      X0, (DX)
 984  	PSHUFB     X15, X0
 985  	PXOR       X8, X0
 986  	MOVOU      X1, 16(DX)
 987  	PSHUFB     X15, X1
 988  	MOVOU      X2, 32(DX)
 989  	PSHUFB     X15, X2
 990  	MOVOU      X3, 48(DX)
 991  	PSHUFB     X15, X3
 992  	MOVOU      X4, 64(DX)
 993  	PSHUFB     X15, X4
 994  	MOVOU      X5, 80(DX)
 995  	PSHUFB     X15, X5
 996  	MOVOU      X6, 96(DX)
 997  	PSHUFB     X15, X6
 998  	MOVOU      X7, 112(DX)
 999  	PSHUFB     X15, X7
1000  	MOVOU      X0, (SP)
1001  	MOVOU      X1, 16(SP)
1002  	MOVOU      X2, 32(SP)
1003  	MOVOU      X3, 48(SP)
1004  	MOVOU      X4, 64(SP)
1005  	MOVOU      X5, 80(SP)
1006  	MOVOU      X6, 96(SP)
1007  	MOVOU      X7, 112(SP)
1008  	LEAQ       128(SI), SI
1009  	LEAQ       128(DX), DX
1010  	JMP        gcmAesEncOctetsLoop
1011  
1012  gcmAesEncOctetsEnd:
1013  	MOVOU     (SP), X11
1014  	MOVOU     (DI), X8
1015  	MOVOU     16(DI), X10
1016  	MOVOU     X8, X9
1017  	PSHUFD    $0x4e, X11, X12
1018  	PXOR      X11, X12
1019  	PCLMULQDQ $0x00, X11, X8
1020  	PCLMULQDQ $0x11, X11, X9
1021  	PCLMULQDQ $0x00, X12, X10
1022  	MOVOU     16(SP), X11
1023  	MOVOU     32(DI), X12
1024  	MOVOU     X12, X13
1025  	PCLMULQDQ $0x00, X11, X12
1026  	PXOR      X12, X8
1027  	PCLMULQDQ $0x11, X11, X13
1028  	PXOR      X13, X9
1029  	PSHUFD    $0x4e, X11, X12
1030  	PXOR      X12, X11
1031  	MOVOU     48(DI), X12
1032  	PCLMULQDQ $0x00, X11, X12
1033  	PXOR      X12, X10
1034  	MOVOU     32(SP), X11
1035  	MOVOU     64(DI), X12
1036  	MOVOU     X12, X13
1037  	PCLMULQDQ $0x00, X11, X12
1038  	PXOR      X12, X8
1039  	PCLMULQDQ $0x11, X11, X13
1040  	PXOR      X13, X9
1041  	PSHUFD    $0x4e, X11, X12
1042  	PXOR      X12, X11
1043  	MOVOU     80(DI), X12
1044  	PCLMULQDQ $0x00, X11, X12
1045  	PXOR      X12, X10
1046  	MOVOU     48(SP), X11
1047  	MOVOU     96(DI), X12
1048  	MOVOU     X12, X13
1049  	PCLMULQDQ $0x00, X11, X12
1050  	PXOR      X12, X8
1051  	PCLMULQDQ $0x11, X11, X13
1052  	PXOR      X13, X9
1053  	PSHUFD    $0x4e, X11, X12
1054  	PXOR      X12, X11
1055  	MOVOU     112(DI), X12
1056  	PCLMULQDQ $0x00, X11, X12
1057  	PXOR      X12, X10
1058  	MOVOU     64(SP), X11
1059  	MOVOU     128(DI), X12
1060  	MOVOU     X12, X13
1061  	PCLMULQDQ $0x00, X11, X12
1062  	PXOR      X12, X8
1063  	PCLMULQDQ $0x11, X11, X13
1064  	PXOR      X13, X9
1065  	PSHUFD    $0x4e, X11, X12
1066  	PXOR      X12, X11
1067  	MOVOU     144(DI), X12
1068  	PCLMULQDQ $0x00, X11, X12
1069  	PXOR      X12, X10
1070  	MOVOU     80(SP), X11
1071  	MOVOU     160(DI), X12
1072  	MOVOU     X12, X13
1073  	PCLMULQDQ $0x00, X11, X12
1074  	PXOR      X12, X8
1075  	PCLMULQDQ $0x11, X11, X13
1076  	PXOR      X13, X9
1077  	PSHUFD    $0x4e, X11, X12
1078  	PXOR      X12, X11
1079  	MOVOU     176(DI), X12
1080  	PCLMULQDQ $0x00, X11, X12
1081  	PXOR      X12, X10
1082  	MOVOU     96(SP), X11
1083  	MOVOU     192(DI), X12
1084  	MOVOU     X12, X13
1085  	PCLMULQDQ $0x00, X11, X12
1086  	PXOR      X12, X8
1087  	PCLMULQDQ $0x11, X11, X13
1088  	PXOR      X13, X9
1089  	PSHUFD    $0x4e, X11, X12
1090  	PXOR      X12, X11
1091  	MOVOU     208(DI), X12
1092  	PCLMULQDQ $0x00, X11, X12
1093  	PXOR      X12, X10
1094  	MOVOU     112(SP), X11
1095  	MOVOU     224(DI), X12
1096  	MOVOU     X12, X13
1097  	PCLMULQDQ $0x00, X11, X12
1098  	PXOR      X12, X8
1099  	PCLMULQDQ $0x11, X11, X13
1100  	PXOR      X13, X9
1101  	PSHUFD    $0x4e, X11, X12
1102  	PXOR      X12, X11
1103  	MOVOU     240(DI), X12
1104  	PCLMULQDQ $0x00, X11, X12
1105  	PXOR      X12, X10
1106  	PXOR      X8, X10
1107  	PXOR      X9, X10
1108  	MOVOU     X10, X11
1109  	PSRLDQ    $0x08, X10
1110  	PSLLDQ    $0x08, X11
1111  	PXOR      X10, X9
1112  	PXOR      X11, X8
1113  	MOVOU     X14, X11
1114  	PCLMULQDQ $0x01, X8, X11
1115  	PSHUFD    $0x4e, X8, X8
1116  	PXOR      X11, X8
1117  	MOVOU     X14, X11
1118  	PCLMULQDQ $0x01, X8, X11
1119  	PSHUFD    $0x4e, X8, X8
1120  	PXOR      X11, X8
1121  	PXOR      X9, X8
1122  	TESTQ     R9, R9
1123  	JE        gcmAesEncDone
1124  	SUBQ      $0x07, R10
1125  
1126  gcmAesEncSingles:
1127  	MOVOU 16(AX), X1
1128  	MOVOU 32(AX), X2
1129  	MOVOU 48(AX), X3
1130  	MOVOU 64(AX), X4
1131  	MOVOU 80(AX), X5
1132  	MOVOU 96(AX), X6
1133  	MOVOU 112(AX), X7
1134  	MOVOU 224(DI), X13
1135  
1136  gcmAesEncSinglesLoop:
1137  	CMPQ   R9, $0x10
1138  	JB     gcmAesEncTail
1139  	SUBQ   $0x10, R9
1140  	MOVOU  128(SP), X0
1141  	ADDL   $0x01, R10
1142  	MOVL   R10, R11
1143  	XORL   R12, R11
1144  	BSWAPL R11
1145  	MOVL   R11, 140(SP)
1146  	AESENC X1, X0
1147  	AESENC X2, X0
1148  	AESENC X3, X0
1149  	AESENC X4, X0
1150  	AESENC X5, X0
1151  	AESENC X6, X0
1152  	AESENC X7, X0
1153  	MOVOU  128(AX), X11
1154  	AESENC X11, X0
1155  	MOVOU  144(AX), X11
1156  	AESENC X11, X0
1157  	MOVOU  160(AX), X11
1158  	CMPQ   R13, $0x0c
1159  	JB     encLast3
1160  	AESENC X11, X0
1161  	MOVOU  176(AX), X11
1162  	AESENC X11, X0
1163  	MOVOU  192(AX), X11
1164  	JE     encLast3
1165  	AESENC X11, X0
1166  	MOVOU  208(AX), X11
1167  	AESENC X11, X0
1168  	MOVOU  224(AX), X11
1169  
1170  encLast3:
1171  	AESENCLAST X11, X0
1172  	MOVOU      (SI), X11
1173  	PXOR       X11, X0
1174  	MOVOU      X0, (DX)
1175  	PSHUFB     X15, X0
1176  	PXOR       X8, X0
1177  	MOVOU      X13, X8
1178  	MOVOU      X13, X9
1179  	MOVOU      240(DI), X10
1180  	PSHUFD     $0x4e, X0, X11
1181  	PXOR       X0, X11
1182  	PCLMULQDQ  $0x00, X0, X8
1183  	PCLMULQDQ  $0x11, X0, X9
1184  	PCLMULQDQ  $0x00, X11, X10
1185  	PXOR       X8, X10
1186  	PXOR       X9, X10
1187  	MOVOU      X10, X11
1188  	PSRLDQ     $0x08, X10
1189  	PSLLDQ     $0x08, X11
1190  	PXOR       X10, X9
1191  	PXOR       X11, X8
1192  	MOVOU      X14, X11
1193  	PCLMULQDQ  $0x01, X8, X11
1194  	PSHUFD     $0x4e, X8, X8
1195  	PXOR       X11, X8
1196  	MOVOU      X14, X11
1197  	PCLMULQDQ  $0x01, X8, X11
1198  	PSHUFD     $0x4e, X8, X8
1199  	PXOR       X11, X8
1200  	PXOR       X9, X8
1201  	LEAQ       16(SI), SI
1202  	LEAQ       16(DX), DX
1203  	JMP        gcmAesEncSinglesLoop
1204  
1205  gcmAesEncTail:
1206  	TESTQ  R9, R9
1207  	JE     gcmAesEncDone
1208  	MOVOU  128(SP), X0
1209  	AESENC X1, X0
1210  	AESENC X2, X0
1211  	AESENC X3, X0
1212  	AESENC X4, X0
1213  	AESENC X5, X0
1214  	AESENC X6, X0
1215  	AESENC X7, X0
1216  	MOVOU  128(AX), X11
1217  	AESENC X11, X0
1218  	MOVOU  144(AX), X11
1219  	AESENC X11, X0
1220  	MOVOU  160(AX), X11
1221  	CMPQ   R13, $0x0c
1222  	JB     encLast4
1223  	AESENC X11, X0
1224  	MOVOU  176(AX), X11
1225  	AESENC X11, X0
1226  	MOVOU  192(AX), X11
1227  	JE     encLast4
1228  	AESENC X11, X0
1229  	MOVOU  208(AX), X11
1230  	AESENC X11, X0
1231  	MOVOU  224(AX), X11
1232  
1233  encLast4:
1234  	AESENCLAST X11, X0
1235  	MOVOU      X0, X11
1236  	LEAQ       -1(SI)(R9*1), SI
1237  	MOVQ       R9, R11
1238  	SHLQ       $0x04, R11
1239  	LEAQ       andMask<>+0(SB), R10
1240  	MOVOU      -16(R10)(R11*1), X12
1241  	PXOR       X0, X0
1242  
1243  ptxLoadLoop:
1244  	PSLLDQ    $0x01, X0
1245  	PINSRB    $0x00, (SI), X0
1246  	LEAQ      -1(SI), SI
1247  	DECQ      R9
1248  	JNE       ptxLoadLoop
1249  	PXOR      X11, X0
1250  	PAND      X12, X0
1251  	MOVOU     X0, (DX)
1252  	PSHUFB    X15, X0
1253  	PXOR      X8, X0
1254  	MOVOU     X13, X8
1255  	MOVOU     X13, X9
1256  	MOVOU     240(DI), X10
1257  	PSHUFD    $0x4e, X0, X11
1258  	PXOR      X0, X11
1259  	PCLMULQDQ $0x00, X0, X8
1260  	PCLMULQDQ $0x11, X0, X9
1261  	PCLMULQDQ $0x00, X11, X10
1262  	PXOR      X8, X10
1263  	PXOR      X9, X10
1264  	MOVOU     X10, X11
1265  	PSRLDQ    $0x08, X10
1266  	PSLLDQ    $0x08, X11
1267  	PXOR      X10, X9
1268  	PXOR      X11, X8
1269  	MOVOU     X14, X11
1270  	PCLMULQDQ $0x01, X8, X11
1271  	PSHUFD    $0x4e, X8, X8
1272  	PXOR      X11, X8
1273  	MOVOU     X14, X11
1274  	PCLMULQDQ $0x01, X8, X11
1275  	PSHUFD    $0x4e, X8, X8
1276  	PXOR      X11, X8
1277  	PXOR      X9, X8
1278  
1279  gcmAesEncDone:
1280  	MOVOU X8, (R8)
1281  	RET
1282  
1283  DATA andMask<>+0(SB)/8, $0x00000000000000ff
1284  DATA andMask<>+8(SB)/8, $0x0000000000000000
1285  DATA andMask<>+16(SB)/8, $0x000000000000ffff
1286  DATA andMask<>+24(SB)/8, $0x0000000000000000
1287  DATA andMask<>+32(SB)/8, $0x0000000000ffffff
1288  DATA andMask<>+40(SB)/8, $0x0000000000000000
1289  DATA andMask<>+48(SB)/8, $0x00000000ffffffff
1290  DATA andMask<>+56(SB)/8, $0x0000000000000000
1291  DATA andMask<>+64(SB)/8, $0x000000ffffffffff
1292  DATA andMask<>+72(SB)/8, $0x0000000000000000
1293  DATA andMask<>+80(SB)/8, $0x0000ffffffffffff
1294  DATA andMask<>+88(SB)/8, $0x0000000000000000
1295  DATA andMask<>+96(SB)/8, $0x00ffffffffffffff
1296  DATA andMask<>+104(SB)/8, $0x0000000000000000
1297  DATA andMask<>+112(SB)/8, $0xffffffffffffffff
1298  DATA andMask<>+120(SB)/8, $0x0000000000000000
1299  DATA andMask<>+128(SB)/8, $0xffffffffffffffff
1300  DATA andMask<>+136(SB)/8, $0x00000000000000ff
1301  DATA andMask<>+144(SB)/8, $0xffffffffffffffff
1302  DATA andMask<>+152(SB)/8, $0x000000000000ffff
1303  DATA andMask<>+160(SB)/8, $0xffffffffffffffff
1304  DATA andMask<>+168(SB)/8, $0x0000000000ffffff
1305  DATA andMask<>+176(SB)/8, $0xffffffffffffffff
1306  DATA andMask<>+184(SB)/8, $0x00000000ffffffff
1307  DATA andMask<>+192(SB)/8, $0xffffffffffffffff
1308  DATA andMask<>+200(SB)/8, $0x000000ffffffffff
1309  DATA andMask<>+208(SB)/8, $0xffffffffffffffff
1310  DATA andMask<>+216(SB)/8, $0x0000ffffffffffff
1311  DATA andMask<>+224(SB)/8, $0xffffffffffffffff
1312  DATA andMask<>+232(SB)/8, $0x00ffffffffffffff
1313  GLOBL andMask<>(SB), RODATA|NOPTR, $240
1314  
1315  // func gcmAesDec(productTable *[256]byte, dst []byte, src []byte, ctr *[16]byte, T *[16]byte, ks []uint32)
1316  // Requires: AES, PCLMULQDQ, SSE2, SSE4.1, SSSE3
1317  TEXT ·gcmAesDec(SB), $128-96
1318  	MOVQ   productTable+0(FP), DI
1319  	MOVQ   dst_base+8(FP), SI
1320  	MOVQ   src_base+32(FP), DX
1321  	MOVQ   src_len+40(FP), R9
1322  	MOVQ   ctr+56(FP), CX
1323  	MOVQ   T+64(FP), R8
1324  	MOVQ   ks_base+72(FP), AX
1325  	MOVQ   ks_len+80(FP), R13
1326  	SHRQ   $0x02, R13
1327  	DECQ   R13
1328  	MOVOU  bswapMask<>+0(SB), X15
1329  	MOVOU  gcmPoly<>+0(SB), X14
1330  	MOVOU  (R8), X8
1331  	PXOR   X9, X9
1332  	PXOR   X10, X10
1333  	MOVOU  (CX), X0
1334  	MOVL   12(CX), R10
1335  	MOVOU  (AX), X11
1336  	MOVL   12(AX), R12
1337  	BSWAPL R10
1338  	BSWAPL R12
1339  	PXOR   X0, X11
1340  	MOVOU  X11, (SP)
1341  	ADDL   $0x01, R10
1342  	MOVL   R10, R11
1343  	XORL   R12, R11
1344  	BSWAPL R11
1345  	MOVL   R11, 12(SP)
1346  	CMPQ   R9, $0x80
1347  	JB     gcmAesDecSingles
1348  	MOVOU  X11, 16(SP)
1349  	ADDL   $0x01, R10
1350  	MOVL   R10, R11
1351  	XORL   R12, R11
1352  	BSWAPL R11
1353  	MOVL   R11, 28(SP)
1354  	MOVOU  X11, 32(SP)
1355  	ADDL   $0x01, R10
1356  	MOVL   R10, R11
1357  	XORL   R12, R11
1358  	BSWAPL R11
1359  	MOVL   R11, 44(SP)
1360  	MOVOU  X11, 48(SP)
1361  	ADDL   $0x01, R10
1362  	MOVL   R10, R11
1363  	XORL   R12, R11
1364  	BSWAPL R11
1365  	MOVL   R11, 60(SP)
1366  	MOVOU  X11, 64(SP)
1367  	ADDL   $0x01, R10
1368  	MOVL   R10, R11
1369  	XORL   R12, R11
1370  	BSWAPL R11
1371  	MOVL   R11, 76(SP)
1372  	MOVOU  X11, 80(SP)
1373  	ADDL   $0x01, R10
1374  	MOVL   R10, R11
1375  	XORL   R12, R11
1376  	BSWAPL R11
1377  	MOVL   R11, 92(SP)
1378  	MOVOU  X11, 96(SP)
1379  	ADDL   $0x01, R10
1380  	MOVL   R10, R11
1381  	XORL   R12, R11
1382  	BSWAPL R11
1383  	MOVL   R11, 108(SP)
1384  	MOVOU  X11, 112(SP)
1385  	ADDL   $0x01, R10
1386  	MOVL   R10, R11
1387  	XORL   R12, R11
1388  	BSWAPL R11
1389  	MOVL   R11, 124(SP)
1390  
1391  gcmAesDecOctetsLoop:
1392  	CMPQ      R9, $0x80
1393  	JB        gcmAesDecEndOctets
1394  	SUBQ      $0x80, R9
1395  	MOVOU     (SP), X0
1396  	MOVOU     16(SP), X1
1397  	MOVOU     32(SP), X2
1398  	MOVOU     48(SP), X3
1399  	MOVOU     64(SP), X4
1400  	MOVOU     80(SP), X5
1401  	MOVOU     96(SP), X6
1402  	MOVOU     112(SP), X7
1403  	MOVOU     (DX), X11
1404  	PSHUFB    X15, X11
1405  	PXOR      X8, X11
1406  	PSHUFD    $0x4e, X11, X12
1407  	PXOR      X11, X12
1408  	MOVOU     (DI), X8
1409  	MOVOU     16(DI), X10
1410  	MOVOU     X8, X9
1411  	PCLMULQDQ $0x00, X12, X10
1412  	PCLMULQDQ $0x00, X11, X8
1413  	PCLMULQDQ $0x11, X11, X9
1414  	MOVOU     16(AX), X11
1415  	AESENC    X11, X0
1416  	AESENC    X11, X1
1417  	AESENC    X11, X2
1418  	AESENC    X11, X3
1419  	MOVOU     32(DI), X12
1420  	MOVOU     X12, X13
1421  	AESENC    X11, X4
1422  	AESENC    X11, X5
1423  	AESENC    X11, X6
1424  	AESENC    X11, X7
1425  	MOVOU     16(DX), X11
1426  	PSHUFB    X15, X11
1427  	PCLMULQDQ $0x00, X11, X12
1428  	PXOR      X12, X8
1429  	PSHUFD    $0x4e, X11, X12
1430  	PCLMULQDQ $0x11, X11, X13
1431  	PXOR      X12, X11
1432  	PXOR      X13, X9
1433  	MOVOU     48(DI), X13
1434  	PCLMULQDQ $0x00, X13, X11
1435  	PXOR      X11, X10
1436  	ADDL      $0x01, R10
1437  	MOVL      R10, R11
1438  	XORL      R12, R11
1439  	BSWAPL    R11
1440  	MOVL      R11, 12(SP)
1441  	MOVOU     32(AX), X11
1442  	AESENC    X11, X0
1443  	AESENC    X11, X1
1444  	AESENC    X11, X2
1445  	AESENC    X11, X3
1446  	MOVOU     64(DI), X12
1447  	MOVOU     X12, X13
1448  	AESENC    X11, X4
1449  	AESENC    X11, X5
1450  	AESENC    X11, X6
1451  	AESENC    X11, X7
1452  	MOVOU     32(DX), X11
1453  	PSHUFB    X15, X11
1454  	PCLMULQDQ $0x00, X11, X12
1455  	PXOR      X12, X8
1456  	PSHUFD    $0x4e, X11, X12
1457  	PCLMULQDQ $0x11, X11, X13
1458  	PXOR      X12, X11
1459  	PXOR      X13, X9
1460  	MOVOU     80(DI), X13
1461  	PCLMULQDQ $0x00, X13, X11
1462  	PXOR      X11, X10
1463  	ADDL      $0x01, R10
1464  	MOVL      R10, R11
1465  	XORL      R12, R11
1466  	BSWAPL    R11
1467  	MOVL      R11, 28(SP)
1468  	MOVOU     48(AX), X11
1469  	AESENC    X11, X0
1470  	AESENC    X11, X1
1471  	AESENC    X11, X2
1472  	AESENC    X11, X3
1473  	MOVOU     96(DI), X12
1474  	MOVOU     X12, X13
1475  	AESENC    X11, X4
1476  	AESENC    X11, X5
1477  	AESENC    X11, X6
1478  	AESENC    X11, X7
1479  	MOVOU     48(DX), X11
1480  	PSHUFB    X15, X11
1481  	PCLMULQDQ $0x00, X11, X12
1482  	PXOR      X12, X8
1483  	PSHUFD    $0x4e, X11, X12
1484  	PCLMULQDQ $0x11, X11, X13
1485  	PXOR      X12, X11
1486  	PXOR      X13, X9
1487  	MOVOU     112(DI), X13
1488  	PCLMULQDQ $0x00, X13, X11
1489  	PXOR      X11, X10
1490  	ADDL      $0x01, R10
1491  	MOVL      R10, R11
1492  	XORL      R12, R11
1493  	BSWAPL    R11
1494  	MOVL      R11, 44(SP)
1495  	MOVOU     64(AX), X11
1496  	AESENC    X11, X0
1497  	AESENC    X11, X1
1498  	AESENC    X11, X2
1499  	AESENC    X11, X3
1500  	MOVOU     128(DI), X12
1501  	MOVOU     X12, X13
1502  	AESENC    X11, X4
1503  	AESENC    X11, X5
1504  	AESENC    X11, X6
1505  	AESENC    X11, X7
1506  	MOVOU     64(DX), X11
1507  	PSHUFB    X15, X11
1508  	PCLMULQDQ $0x00, X11, X12
1509  	PXOR      X12, X8
1510  	PSHUFD    $0x4e, X11, X12
1511  	PCLMULQDQ $0x11, X11, X13
1512  	PXOR      X12, X11
1513  	PXOR      X13, X9
1514  	MOVOU     144(DI), X13
1515  	PCLMULQDQ $0x00, X13, X11
1516  	PXOR      X11, X10
1517  	ADDL      $0x01, R10
1518  	MOVL      R10, R11
1519  	XORL      R12, R11
1520  	BSWAPL    R11
1521  	MOVL      R11, 60(SP)
1522  	MOVOU     80(AX), X11
1523  	AESENC    X11, X0
1524  	AESENC    X11, X1
1525  	AESENC    X11, X2
1526  	AESENC    X11, X3
1527  	MOVOU     160(DI), X12
1528  	MOVOU     X12, X13
1529  	AESENC    X11, X4
1530  	AESENC    X11, X5
1531  	AESENC    X11, X6
1532  	AESENC    X11, X7
1533  	MOVOU     80(DX), X11
1534  	PSHUFB    X15, X11
1535  	PCLMULQDQ $0x00, X11, X12
1536  	PXOR      X12, X8
1537  	PSHUFD    $0x4e, X11, X12
1538  	PCLMULQDQ $0x11, X11, X13
1539  	PXOR      X12, X11
1540  	PXOR      X13, X9
1541  	MOVOU     176(DI), X13
1542  	PCLMULQDQ $0x00, X13, X11
1543  	PXOR      X11, X10
1544  	ADDL      $0x01, R10
1545  	MOVL      R10, R11
1546  	XORL      R12, R11
1547  	BSWAPL    R11
1548  	MOVL      R11, 76(SP)
1549  	MOVOU     96(AX), X11
1550  	AESENC    X11, X0
1551  	AESENC    X11, X1
1552  	AESENC    X11, X2
1553  	AESENC    X11, X3
1554  	MOVOU     192(DI), X12
1555  	MOVOU     X12, X13
1556  	AESENC    X11, X4
1557  	AESENC    X11, X5
1558  	AESENC    X11, X6
1559  	AESENC    X11, X7
1560  	MOVOU     96(DX), X11
1561  	PSHUFB    X15, X11
1562  	PCLMULQDQ $0x00, X11, X12
1563  	PXOR      X12, X8
1564  	PSHUFD    $0x4e, X11, X12
1565  	PCLMULQDQ $0x11, X11, X13
1566  	PXOR      X12, X11
1567  	PXOR      X13, X9
1568  	MOVOU     208(DI), X13
1569  	PCLMULQDQ $0x00, X13, X11
1570  	PXOR      X11, X10
1571  	ADDL      $0x01, R10
1572  	MOVL      R10, R11
1573  	XORL      R12, R11
1574  	BSWAPL    R11
1575  	MOVL      R11, 92(SP)
1576  	MOVOU     112(AX), X11
1577  	AESENC    X11, X0
1578  	AESENC    X11, X1
1579  	AESENC    X11, X2
1580  	AESENC    X11, X3
1581  	MOVOU     224(DI), X12
1582  	MOVOU     X12, X13
1583  	AESENC    X11, X4
1584  	AESENC    X11, X5
1585  	AESENC    X11, X6
1586  	AESENC    X11, X7
1587  	MOVOU     112(DX), X11
1588  	PSHUFB    X15, X11
1589  	PCLMULQDQ $0x00, X11, X12
1590  	PXOR      X12, X8
1591  	PSHUFD    $0x4e, X11, X12
1592  	PCLMULQDQ $0x11, X11, X13
1593  	PXOR      X12, X11
1594  	PXOR      X13, X9
1595  	MOVOU     240(DI), X13
1596  	PCLMULQDQ $0x00, X13, X11
1597  	PXOR      X11, X10
1598  	ADDL      $0x01, R10
1599  	MOVL      R10, R11
1600  	XORL      R12, R11
1601  	BSWAPL    R11
1602  	MOVL      R11, 108(SP)
1603  	MOVOU     128(AX), X11
1604  	AESENC    X11, X0
1605  	AESENC    X11, X1
1606  	AESENC    X11, X2
1607  	AESENC    X11, X3
1608  	AESENC    X11, X4
1609  	AESENC    X11, X5
1610  	AESENC    X11, X6
1611  	AESENC    X11, X7
1612  	ADDL      $0x01, R10
1613  	MOVL      R10, R11
1614  	XORL      R12, R11
1615  	BSWAPL    R11
1616  	MOVL      R11, 124(SP)
1617  	PXOR      X8, X10
1618  	PXOR      X9, X10
1619  	MOVOU     X10, X11
1620  	PSRLDQ    $0x08, X10
1621  	PSLLDQ    $0x08, X11
1622  	PXOR      X10, X9
1623  	PXOR      X11, X8
1624  	MOVOU     X14, X11
1625  	PCLMULQDQ $0x01, X8, X11
1626  	PSHUFD    $0x4e, X8, X8
1627  	PXOR      X11, X8
1628  	MOVOU     144(AX), X11
1629  	AESENC    X11, X0
1630  	AESENC    X11, X1
1631  	AESENC    X11, X2
1632  	AESENC    X11, X3
1633  	AESENC    X11, X4
1634  	AESENC    X11, X5
1635  	AESENC    X11, X6
1636  	AESENC    X11, X7
1637  	MOVOU     X14, X11
1638  	PCLMULQDQ $0x01, X8, X11
1639  	PSHUFD    $0x4e, X8, X8
1640  	PXOR      X11, X8
1641  	PXOR      X9, X8
1642  	MOVOU     160(AX), X11
1643  	CMPQ      R13, $0x0c
1644  	JB        decLast1
1645  	AESENC    X11, X0
1646  	AESENC    X11, X1
1647  	AESENC    X11, X2
1648  	AESENC    X11, X3
1649  	AESENC    X11, X4
1650  	AESENC    X11, X5
1651  	AESENC    X11, X6
1652  	AESENC    X11, X7
1653  	MOVOU     176(AX), X11
1654  	AESENC    X11, X0
1655  	AESENC    X11, X1
1656  	AESENC    X11, X2
1657  	AESENC    X11, X3
1658  	AESENC    X11, X4
1659  	AESENC    X11, X5
1660  	AESENC    X11, X6
1661  	AESENC    X11, X7
1662  	MOVOU     192(AX), X11
1663  	JE        decLast1
1664  	AESENC    X11, X0
1665  	AESENC    X11, X1
1666  	AESENC    X11, X2
1667  	AESENC    X11, X3
1668  	AESENC    X11, X4
1669  	AESENC    X11, X5
1670  	AESENC    X11, X6
1671  	AESENC    X11, X7
1672  	MOVOU     208(AX), X11
1673  	AESENC    X11, X0
1674  	AESENC    X11, X1
1675  	AESENC    X11, X2
1676  	AESENC    X11, X3
1677  	AESENC    X11, X4
1678  	AESENC    X11, X5
1679  	AESENC    X11, X6
1680  	AESENC    X11, X7
1681  	MOVOU     224(AX), X11
1682  
1683  decLast1:
1684  	AESENCLAST X11, X0
1685  	AESENCLAST X11, X1
1686  	AESENCLAST X11, X2
1687  	AESENCLAST X11, X3
1688  	AESENCLAST X11, X4
1689  	AESENCLAST X11, X5
1690  	AESENCLAST X11, X6
1691  	AESENCLAST X11, X7
1692  	MOVOU      (DX), X11
1693  	PXOR       X11, X0
1694  	MOVOU      16(DX), X11
1695  	PXOR       X11, X1
1696  	MOVOU      32(DX), X11
1697  	PXOR       X11, X2
1698  	MOVOU      48(DX), X11
1699  	PXOR       X11, X3
1700  	MOVOU      64(DX), X11
1701  	PXOR       X11, X4
1702  	MOVOU      80(DX), X11
1703  	PXOR       X11, X5
1704  	MOVOU      96(DX), X11
1705  	PXOR       X11, X6
1706  	MOVOU      112(DX), X11
1707  	PXOR       X11, X7
1708  	MOVOU      X0, (SI)
1709  	MOVOU      X1, 16(SI)
1710  	MOVOU      X2, 32(SI)
1711  	MOVOU      X3, 48(SI)
1712  	MOVOU      X4, 64(SI)
1713  	MOVOU      X5, 80(SI)
1714  	MOVOU      X6, 96(SI)
1715  	MOVOU      X7, 112(SI)
1716  	LEAQ       128(SI), SI
1717  	LEAQ       128(DX), DX
1718  	JMP        gcmAesDecOctetsLoop
1719  
1720  gcmAesDecEndOctets:
1721  	SUBQ $0x07, R10
1722  
1723  gcmAesDecSingles:
1724  	MOVOU 16(AX), X1
1725  	MOVOU 32(AX), X2
1726  	MOVOU 48(AX), X3
1727  	MOVOU 64(AX), X4
1728  	MOVOU 80(AX), X5
1729  	MOVOU 96(AX), X6
1730  	MOVOU 112(AX), X7
1731  	MOVOU 224(DI), X13
1732  
1733  gcmAesDecSinglesLoop:
1734  	CMPQ      R9, $0x10
1735  	JB        gcmAesDecTail
1736  	SUBQ      $0x10, R9
1737  	MOVOU     (DX), X0
1738  	MOVOU     X0, X12
1739  	PSHUFB    X15, X0
1740  	PXOR      X8, X0
1741  	MOVOU     X13, X8
1742  	MOVOU     X13, X9
1743  	MOVOU     240(DI), X10
1744  	PCLMULQDQ $0x00, X0, X8
1745  	PCLMULQDQ $0x11, X0, X9
1746  	PSHUFD    $0x4e, X0, X11
1747  	PXOR      X0, X11
1748  	PCLMULQDQ $0x00, X11, X10
1749  	PXOR      X8, X10
1750  	PXOR      X9, X10
1751  	MOVOU     X10, X11
1752  	PSRLDQ    $0x08, X10
1753  	PSLLDQ    $0x08, X11
1754  	PXOR      X10, X9
1755  	PXOR      X11, X8
1756  	MOVOU     X14, X11
1757  	PCLMULQDQ $0x01, X8, X11
1758  	PSHUFD    $0x4e, X8, X8
1759  	PXOR      X11, X8
1760  	MOVOU     X14, X11
1761  	PCLMULQDQ $0x01, X8, X11
1762  	PSHUFD    $0x4e, X8, X8
1763  	PXOR      X11, X8
1764  	PXOR      X9, X8
1765  	MOVOU     (SP), X0
1766  	ADDL      $0x01, R10
1767  	MOVL      R10, R11
1768  	XORL      R12, R11
1769  	BSWAPL    R11
1770  	MOVL      R11, 12(SP)
1771  	AESENC    X1, X0
1772  	AESENC    X2, X0
1773  	AESENC    X3, X0
1774  	AESENC    X4, X0
1775  	AESENC    X5, X0
1776  	AESENC    X6, X0
1777  	AESENC    X7, X0
1778  	MOVOU     128(AX), X11
1779  	AESENC    X11, X0
1780  	MOVOU     144(AX), X11
1781  	AESENC    X11, X0
1782  	MOVOU     160(AX), X11
1783  	CMPQ      R13, $0x0c
1784  	JB        decLast2
1785  	AESENC    X11, X0
1786  	MOVOU     176(AX), X11
1787  	AESENC    X11, X0
1788  	MOVOU     192(AX), X11
1789  	JE        decLast2
1790  	AESENC    X11, X0
1791  	MOVOU     208(AX), X11
1792  	AESENC    X11, X0
1793  	MOVOU     224(AX), X11
1794  
1795  decLast2:
1796  	AESENCLAST X11, X0
1797  	PXOR       X12, X0
1798  	MOVOU      X0, (SI)
1799  	LEAQ       16(SI), SI
1800  	LEAQ       16(DX), DX
1801  	JMP        gcmAesDecSinglesLoop
1802  
1803  gcmAesDecTail:
1804  	TESTQ     R9, R9
1805  	JE        gcmAesDecDone
1806  	MOVQ      R9, R11
1807  	SHLQ      $0x04, R11
1808  	LEAQ      andMask<>+0(SB), R10
1809  	MOVOU     -16(R10)(R11*1), X12
1810  	MOVOU     (DX), X0
1811  	PAND      X12, X0
1812  	MOVOU     X0, X12
1813  	PSHUFB    X15, X0
1814  	PXOR      X8, X0
1815  	MOVOU     224(DI), X8
1816  	MOVOU     240(DI), X10
1817  	MOVOU     X8, X9
1818  	PCLMULQDQ $0x00, X0, X8
1819  	PCLMULQDQ $0x11, X0, X9
1820  	PSHUFD    $0x4e, X0, X11
1821  	PXOR      X0, X11
1822  	PCLMULQDQ $0x00, X11, X10
1823  	PXOR      X8, X10
1824  	PXOR      X9, X10
1825  	MOVOU     X10, X11
1826  	PSRLDQ    $0x08, X10
1827  	PSLLDQ    $0x08, X11
1828  	PXOR      X10, X9
1829  	PXOR      X11, X8
1830  	MOVOU     X14, X11
1831  	PCLMULQDQ $0x01, X8, X11
1832  	PSHUFD    $0x4e, X8, X8
1833  	PXOR      X11, X8
1834  	MOVOU     X14, X11
1835  	PCLMULQDQ $0x01, X8, X11
1836  	PSHUFD    $0x4e, X8, X8
1837  	PXOR      X11, X8
1838  	PXOR      X9, X8
1839  	MOVOU     (SP), X0
1840  	ADDL      $0x01, R10
1841  	MOVL      R10, R11
1842  	XORL      R12, R11
1843  	BSWAPL    R11
1844  	MOVL      R11, 12(SP)
1845  	AESENC    X1, X0
1846  	AESENC    X2, X0
1847  	AESENC    X3, X0
1848  	AESENC    X4, X0
1849  	AESENC    X5, X0
1850  	AESENC    X6, X0
1851  	AESENC    X7, X0
1852  	MOVOU     128(AX), X11
1853  	AESENC    X11, X0
1854  	MOVOU     144(AX), X11
1855  	AESENC    X11, X0
1856  	MOVOU     160(AX), X11
1857  	CMPQ      R13, $0x0c
1858  	JB        decLast3
1859  	AESENC    X11, X0
1860  	MOVOU     176(AX), X11
1861  	AESENC    X11, X0
1862  	MOVOU     192(AX), X11
1863  	JE        decLast3
1864  	AESENC    X11, X0
1865  	MOVOU     208(AX), X11
1866  	AESENC    X11, X0
1867  	MOVOU     224(AX), X11
1868  
1869  decLast3:
1870  	AESENCLAST X11, X0
1871  	PXOR       X12, X0
1872  
1873  ptxStoreLoop:
1874  	PEXTRB $0x00, X0, (SI)
1875  	PSRLDQ $0x01, X0
1876  	LEAQ   1(SI), SI
1877  	DECQ   R9
1878  	JNE    ptxStoreLoop
1879  
1880  gcmAesDecDone:
1881  	MOVOU X8, (R8)
1882  	RET
1883