encodeblock_amd64.s raw

   1  // Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
   2  
   3  //go:build !appengine && !noasm && gc && !noasm
   4  
   5  #include "textflag.h"
   6  
   7  // func _dummy_()
   8  TEXT ·_dummy_(SB), $0
   9  #ifdef GOAMD64_v4
  10  #ifndef GOAMD64_v3
  11  #define GOAMD64_v3
  12  #endif
  13  #endif
  14  	RET
  15  
  16  // func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int
  17  // Requires: BMI, SSE2
  18  TEXT ·encodeBlockAsm(SB), $24-64
  19  	MOVQ tmp+48(FP), AX
  20  	MOVQ dst_base+0(FP), CX
  21  	MOVQ $0x00000200, DX
  22  	MOVQ AX, BX
  23  	PXOR X0, X0
  24  
  25  zero_loop_encodeBlockAsm:
  26  	MOVOU X0, (BX)
  27  	MOVOU X0, 16(BX)
  28  	MOVOU X0, 32(BX)
  29  	MOVOU X0, 48(BX)
  30  	MOVOU X0, 64(BX)
  31  	MOVOU X0, 80(BX)
  32  	MOVOU X0, 96(BX)
  33  	MOVOU X0, 112(BX)
  34  	ADDQ  $0x80, BX
  35  	DECQ  DX
  36  	JNZ   zero_loop_encodeBlockAsm
  37  	MOVL  $0x00000000, 12(SP)
  38  	MOVQ  src_len+32(FP), DX
  39  	LEAQ  -9(DX), BX
  40  	LEAQ  -8(DX), SI
  41  	MOVL  SI, 8(SP)
  42  	SHRQ  $0x05, DX
  43  	SUBL  DX, BX
  44  	LEAQ  (CX)(BX*1), BX
  45  	MOVQ  BX, (SP)
  46  	MOVL  $0x00000001, DX
  47  	MOVL  DX, 16(SP)
  48  	MOVQ  src_base+24(FP), BX
  49  
  50  search_loop_encodeBlockAsm:
  51  	MOVL  DX, SI
  52  	SUBL  12(SP), SI
  53  	SHRL  $0x06, SI
  54  	LEAL  4(DX)(SI*1), SI
  55  	CMPL  SI, 8(SP)
  56  	JAE   emit_remainder_encodeBlockAsm
  57  	MOVQ  (BX)(DX*1), DI
  58  	MOVL  SI, 20(SP)
  59  	MOVQ  $0x0000cf1bbcdcbf9b, R9
  60  	MOVQ  DI, R10
  61  	MOVQ  DI, R11
  62  	SHRQ  $0x08, R11
  63  	SHLQ  $0x10, R10
  64  	IMULQ R9, R10
  65  	SHRQ  $0x32, R10
  66  	SHLQ  $0x10, R11
  67  	IMULQ R9, R11
  68  	SHRQ  $0x32, R11
  69  	MOVL  (AX)(R10*4), SI
  70  	MOVL  (AX)(R11*4), R8
  71  	MOVL  DX, (AX)(R10*4)
  72  	LEAL  1(DX), R10
  73  	MOVL  R10, (AX)(R11*4)
  74  	MOVQ  DI, R10
  75  	SHRQ  $0x10, R10
  76  	SHLQ  $0x10, R10
  77  	IMULQ R9, R10
  78  	SHRQ  $0x32, R10
  79  	MOVL  DX, R9
  80  	SUBL  16(SP), R9
  81  	MOVL  1(BX)(R9*1), R11
  82  	MOVQ  DI, R9
  83  	SHRQ  $0x08, R9
  84  	CMPL  R9, R11
  85  	JNE   no_repeat_found_encodeBlockAsm
  86  	LEAL  1(DX), DI
  87  	MOVL  12(SP), R8
  88  	MOVL  DI, SI
  89  	SUBL  16(SP), SI
  90  	JZ    repeat_extend_back_end_encodeBlockAsm
  91  
  92  repeat_extend_back_loop_encodeBlockAsm:
  93  	CMPL DI, R8
  94  	JBE  repeat_extend_back_end_encodeBlockAsm
  95  	MOVB -1(BX)(SI*1), R9
  96  	MOVB -1(BX)(DI*1), R10
  97  	CMPB R9, R10
  98  	JNE  repeat_extend_back_end_encodeBlockAsm
  99  	LEAL -1(DI), DI
 100  	DECL SI
 101  	JNZ  repeat_extend_back_loop_encodeBlockAsm
 102  
 103  repeat_extend_back_end_encodeBlockAsm:
 104  	MOVL DI, SI
 105  	SUBL 12(SP), SI
 106  	LEAQ 5(CX)(SI*1), SI
 107  	CMPQ SI, (SP)
 108  	JB   repeat_dst_size_check_encodeBlockAsm
 109  	MOVQ $0x00000000, ret+56(FP)
 110  	RET
 111  
 112  repeat_dst_size_check_encodeBlockAsm:
 113  	MOVL 12(SP), SI
 114  	CMPL SI, DI
 115  	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm
 116  	MOVL DI, R9
 117  	MOVL DI, 12(SP)
 118  	LEAQ (BX)(SI*1), R10
 119  	SUBL SI, R9
 120  	LEAL -1(R9), SI
 121  	CMPL SI, $0x3c
 122  	JB   one_byte_repeat_emit_encodeBlockAsm
 123  	CMPL SI, $0x00000100
 124  	JB   two_bytes_repeat_emit_encodeBlockAsm
 125  	CMPL SI, $0x00010000
 126  	JB   three_bytes_repeat_emit_encodeBlockAsm
 127  	CMPL SI, $0x01000000
 128  	JB   four_bytes_repeat_emit_encodeBlockAsm
 129  	MOVB $0xfc, (CX)
 130  	MOVL SI, 1(CX)
 131  	ADDQ $0x05, CX
 132  	JMP  memmove_long_repeat_emit_encodeBlockAsm
 133  
 134  four_bytes_repeat_emit_encodeBlockAsm:
 135  	MOVL SI, R11
 136  	SHRL $0x10, R11
 137  	MOVB $0xf8, (CX)
 138  	MOVW SI, 1(CX)
 139  	MOVB R11, 3(CX)
 140  	ADDQ $0x04, CX
 141  	JMP  memmove_long_repeat_emit_encodeBlockAsm
 142  
 143  three_bytes_repeat_emit_encodeBlockAsm:
 144  	MOVB $0xf4, (CX)
 145  	MOVW SI, 1(CX)
 146  	ADDQ $0x03, CX
 147  	JMP  memmove_long_repeat_emit_encodeBlockAsm
 148  
 149  two_bytes_repeat_emit_encodeBlockAsm:
 150  	MOVB $0xf0, (CX)
 151  	MOVB SI, 1(CX)
 152  	ADDQ $0x02, CX
 153  	CMPL SI, $0x40
 154  	JB   memmove_repeat_emit_encodeBlockAsm
 155  	JMP  memmove_long_repeat_emit_encodeBlockAsm
 156  
 157  one_byte_repeat_emit_encodeBlockAsm:
 158  	SHLB $0x02, SI
 159  	MOVB SI, (CX)
 160  	ADDQ $0x01, CX
 161  
 162  memmove_repeat_emit_encodeBlockAsm:
 163  	LEAQ (CX)(R9*1), SI
 164  
 165  	// genMemMoveShort
 166  	CMPQ R9, $0x08
 167  	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8
 168  	CMPQ R9, $0x10
 169  	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16
 170  	CMPQ R9, $0x20
 171  	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32
 172  	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64
 173  
 174  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8:
 175  	MOVQ (R10), R11
 176  	MOVQ R11, (CX)
 177  	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm
 178  
 179  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16:
 180  	MOVQ (R10), R11
 181  	MOVQ -8(R10)(R9*1), R10
 182  	MOVQ R11, (CX)
 183  	MOVQ R10, -8(CX)(R9*1)
 184  	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm
 185  
 186  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32:
 187  	MOVOU (R10), X0
 188  	MOVOU -16(R10)(R9*1), X1
 189  	MOVOU X0, (CX)
 190  	MOVOU X1, -16(CX)(R9*1)
 191  	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm
 192  
 193  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64:
 194  	MOVOU (R10), X0
 195  	MOVOU 16(R10), X1
 196  	MOVOU -32(R10)(R9*1), X2
 197  	MOVOU -16(R10)(R9*1), X3
 198  	MOVOU X0, (CX)
 199  	MOVOU X1, 16(CX)
 200  	MOVOU X2, -32(CX)(R9*1)
 201  	MOVOU X3, -16(CX)(R9*1)
 202  
 203  memmove_end_copy_repeat_emit_encodeBlockAsm:
 204  	MOVQ SI, CX
 205  	JMP  emit_literal_done_repeat_emit_encodeBlockAsm
 206  
 207  memmove_long_repeat_emit_encodeBlockAsm:
 208  	LEAQ (CX)(R9*1), SI
 209  
 210  	// genMemMoveLong
 211  	MOVOU (R10), X0
 212  	MOVOU 16(R10), X1
 213  	MOVOU -32(R10)(R9*1), X2
 214  	MOVOU -16(R10)(R9*1), X3
 215  	MOVQ  R9, R12
 216  	SHRQ  $0x05, R12
 217  	MOVQ  CX, R11
 218  	ANDL  $0x0000001f, R11
 219  	MOVQ  $0x00000040, R13
 220  	SUBQ  R11, R13
 221  	DECQ  R12
 222  	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
 223  	LEAQ  -32(R10)(R13*1), R11
 224  	LEAQ  -32(CX)(R13*1), R14
 225  
 226  emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back:
 227  	MOVOU (R11), X4
 228  	MOVOU 16(R11), X5
 229  	MOVOA X4, (R14)
 230  	MOVOA X5, 16(R14)
 231  	ADDQ  $0x20, R14
 232  	ADDQ  $0x20, R11
 233  	ADDQ  $0x20, R13
 234  	DECQ  R12
 235  	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back
 236  
 237  emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32:
 238  	MOVOU -32(R10)(R13*1), X4
 239  	MOVOU -16(R10)(R13*1), X5
 240  	MOVOA X4, -32(CX)(R13*1)
 241  	MOVOA X5, -16(CX)(R13*1)
 242  	ADDQ  $0x20, R13
 243  	CMPQ  R9, R13
 244  	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
 245  	MOVOU X0, (CX)
 246  	MOVOU X1, 16(CX)
 247  	MOVOU X2, -32(CX)(R9*1)
 248  	MOVOU X3, -16(CX)(R9*1)
 249  	MOVQ  SI, CX
 250  
 251  emit_literal_done_repeat_emit_encodeBlockAsm:
 252  	ADDL $0x05, DX
 253  	MOVL DX, SI
 254  	SUBL 16(SP), SI
 255  	MOVQ src_len+32(FP), R9
 256  	SUBL DX, R9
 257  	LEAQ (BX)(DX*1), R10
 258  	LEAQ (BX)(SI*1), SI
 259  
 260  	// matchLen
 261  	XORL R12, R12
 262  
 263  matchlen_loopback_16_repeat_extend_encodeBlockAsm:
 264  	CMPL R9, $0x10
 265  	JB   matchlen_match8_repeat_extend_encodeBlockAsm
 266  	MOVQ (R10)(R12*1), R11
 267  	MOVQ 8(R10)(R12*1), R13
 268  	XORQ (SI)(R12*1), R11
 269  	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm
 270  	XORQ 8(SI)(R12*1), R13
 271  	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm
 272  	LEAL -16(R9), R9
 273  	LEAL 16(R12), R12
 274  	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm
 275  
 276  matchlen_bsf_16repeat_extend_encodeBlockAsm:
 277  #ifdef GOAMD64_v3
 278  	TZCNTQ R13, R13
 279  
 280  #else
 281  	BSFQ R13, R13
 282  
 283  #endif
 284  	SARQ $0x03, R13
 285  	LEAL 8(R12)(R13*1), R12
 286  	JMP  repeat_extend_forward_end_encodeBlockAsm
 287  
 288  matchlen_match8_repeat_extend_encodeBlockAsm:
 289  	CMPL R9, $0x08
 290  	JB   matchlen_match4_repeat_extend_encodeBlockAsm
 291  	MOVQ (R10)(R12*1), R11
 292  	XORQ (SI)(R12*1), R11
 293  	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm
 294  	LEAL -8(R9), R9
 295  	LEAL 8(R12), R12
 296  	JMP  matchlen_match4_repeat_extend_encodeBlockAsm
 297  
 298  matchlen_bsf_8_repeat_extend_encodeBlockAsm:
 299  #ifdef GOAMD64_v3
 300  	TZCNTQ R11, R11
 301  
 302  #else
 303  	BSFQ R11, R11
 304  
 305  #endif
 306  	SARQ $0x03, R11
 307  	LEAL (R12)(R11*1), R12
 308  	JMP  repeat_extend_forward_end_encodeBlockAsm
 309  
 310  matchlen_match4_repeat_extend_encodeBlockAsm:
 311  	CMPL R9, $0x04
 312  	JB   matchlen_match2_repeat_extend_encodeBlockAsm
 313  	MOVL (R10)(R12*1), R11
 314  	CMPL (SI)(R12*1), R11
 315  	JNE  matchlen_match2_repeat_extend_encodeBlockAsm
 316  	LEAL -4(R9), R9
 317  	LEAL 4(R12), R12
 318  
 319  matchlen_match2_repeat_extend_encodeBlockAsm:
 320  	CMPL R9, $0x01
 321  	JE   matchlen_match1_repeat_extend_encodeBlockAsm
 322  	JB   repeat_extend_forward_end_encodeBlockAsm
 323  	MOVW (R10)(R12*1), R11
 324  	CMPW (SI)(R12*1), R11
 325  	JNE  matchlen_match1_repeat_extend_encodeBlockAsm
 326  	LEAL 2(R12), R12
 327  	SUBL $0x02, R9
 328  	JZ   repeat_extend_forward_end_encodeBlockAsm
 329  
 330  matchlen_match1_repeat_extend_encodeBlockAsm:
 331  	MOVB (R10)(R12*1), R11
 332  	CMPB (SI)(R12*1), R11
 333  	JNE  repeat_extend_forward_end_encodeBlockAsm
 334  	LEAL 1(R12), R12
 335  
 336  repeat_extend_forward_end_encodeBlockAsm:
 337  	ADDL  R12, DX
 338  	MOVL  DX, SI
 339  	SUBL  DI, SI
 340  	MOVL  16(SP), DI
 341  	TESTL R8, R8
 342  	JZ    repeat_as_copy_encodeBlockAsm
 343  
 344  	// emitRepeat
 345  emit_repeat_again_match_repeat_encodeBlockAsm:
 346  	MOVL SI, R8
 347  	LEAL -4(SI), SI
 348  	CMPL R8, $0x08
 349  	JBE  repeat_two_match_repeat_encodeBlockAsm
 350  	CMPL R8, $0x0c
 351  	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm
 352  	CMPL DI, $0x00000800
 353  	JB   repeat_two_offset_match_repeat_encodeBlockAsm
 354  
 355  cant_repeat_two_offset_match_repeat_encodeBlockAsm:
 356  	CMPL SI, $0x00000104
 357  	JB   repeat_three_match_repeat_encodeBlockAsm
 358  	CMPL SI, $0x00010100
 359  	JB   repeat_four_match_repeat_encodeBlockAsm
 360  	CMPL SI, $0x0100ffff
 361  	JB   repeat_five_match_repeat_encodeBlockAsm
 362  	LEAL -16842747(SI), SI
 363  	MOVL $0xfffb001d, (CX)
 364  	MOVB $0xff, 4(CX)
 365  	ADDQ $0x05, CX
 366  	JMP  emit_repeat_again_match_repeat_encodeBlockAsm
 367  
 368  repeat_five_match_repeat_encodeBlockAsm:
 369  	LEAL -65536(SI), SI
 370  	MOVL SI, DI
 371  	MOVW $0x001d, (CX)
 372  	MOVW SI, 2(CX)
 373  	SARL $0x10, DI
 374  	MOVB DI, 4(CX)
 375  	ADDQ $0x05, CX
 376  	JMP  repeat_end_emit_encodeBlockAsm
 377  
 378  repeat_four_match_repeat_encodeBlockAsm:
 379  	LEAL -256(SI), SI
 380  	MOVW $0x0019, (CX)
 381  	MOVW SI, 2(CX)
 382  	ADDQ $0x04, CX
 383  	JMP  repeat_end_emit_encodeBlockAsm
 384  
 385  repeat_three_match_repeat_encodeBlockAsm:
 386  	LEAL -4(SI), SI
 387  	MOVW $0x0015, (CX)
 388  	MOVB SI, 2(CX)
 389  	ADDQ $0x03, CX
 390  	JMP  repeat_end_emit_encodeBlockAsm
 391  
 392  repeat_two_match_repeat_encodeBlockAsm:
 393  	SHLL $0x02, SI
 394  	ORL  $0x01, SI
 395  	MOVW SI, (CX)
 396  	ADDQ $0x02, CX
 397  	JMP  repeat_end_emit_encodeBlockAsm
 398  
 399  repeat_two_offset_match_repeat_encodeBlockAsm:
 400  	XORQ R8, R8
 401  	LEAL 1(R8)(SI*4), SI
 402  	MOVB DI, 1(CX)
 403  	SARL $0x08, DI
 404  	SHLL $0x05, DI
 405  	ORL  DI, SI
 406  	MOVB SI, (CX)
 407  	ADDQ $0x02, CX
 408  	JMP  repeat_end_emit_encodeBlockAsm
 409  
 410  repeat_as_copy_encodeBlockAsm:
 411  	// emitCopy
 412  	CMPL DI, $0x00010000
 413  	JB   two_byte_offset_repeat_as_copy_encodeBlockAsm
 414  	CMPL SI, $0x40
 415  	JBE  four_bytes_remain_repeat_as_copy_encodeBlockAsm
 416  	MOVB $0xff, (CX)
 417  	MOVL DI, 1(CX)
 418  	LEAL -64(SI), SI
 419  	ADDQ $0x05, CX
 420  	CMPL SI, $0x04
 421  	JB   four_bytes_remain_repeat_as_copy_encodeBlockAsm
 422  
 423  	// emitRepeat
 424  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy:
 425  	MOVL SI, R8
 426  	LEAL -4(SI), SI
 427  	CMPL R8, $0x08
 428  	JBE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy
 429  	CMPL R8, $0x0c
 430  	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
 431  	CMPL DI, $0x00000800
 432  	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
 433  
 434  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
 435  	CMPL SI, $0x00000104
 436  	JB   repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy
 437  	CMPL SI, $0x00010100
 438  	JB   repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy
 439  	CMPL SI, $0x0100ffff
 440  	JB   repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy
 441  	LEAL -16842747(SI), SI
 442  	MOVL $0xfffb001d, (CX)
 443  	MOVB $0xff, 4(CX)
 444  	ADDQ $0x05, CX
 445  	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy
 446  
 447  repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy:
 448  	LEAL -65536(SI), SI
 449  	MOVL SI, DI
 450  	MOVW $0x001d, (CX)
 451  	MOVW SI, 2(CX)
 452  	SARL $0x10, DI
 453  	MOVB DI, 4(CX)
 454  	ADDQ $0x05, CX
 455  	JMP  repeat_end_emit_encodeBlockAsm
 456  
 457  repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy:
 458  	LEAL -256(SI), SI
 459  	MOVW $0x0019, (CX)
 460  	MOVW SI, 2(CX)
 461  	ADDQ $0x04, CX
 462  	JMP  repeat_end_emit_encodeBlockAsm
 463  
 464  repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy:
 465  	LEAL -4(SI), SI
 466  	MOVW $0x0015, (CX)
 467  	MOVB SI, 2(CX)
 468  	ADDQ $0x03, CX
 469  	JMP  repeat_end_emit_encodeBlockAsm
 470  
 471  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy:
 472  	SHLL $0x02, SI
 473  	ORL  $0x01, SI
 474  	MOVW SI, (CX)
 475  	ADDQ $0x02, CX
 476  	JMP  repeat_end_emit_encodeBlockAsm
 477  
 478  repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
 479  	XORQ R8, R8
 480  	LEAL 1(R8)(SI*4), SI
 481  	MOVB DI, 1(CX)
 482  	SARL $0x08, DI
 483  	SHLL $0x05, DI
 484  	ORL  DI, SI
 485  	MOVB SI, (CX)
 486  	ADDQ $0x02, CX
 487  	JMP  repeat_end_emit_encodeBlockAsm
 488  
 489  four_bytes_remain_repeat_as_copy_encodeBlockAsm:
 490  	TESTL SI, SI
 491  	JZ    repeat_end_emit_encodeBlockAsm
 492  	XORL  R8, R8
 493  	LEAL  -1(R8)(SI*4), SI
 494  	MOVB  SI, (CX)
 495  	MOVL  DI, 1(CX)
 496  	ADDQ  $0x05, CX
 497  	JMP   repeat_end_emit_encodeBlockAsm
 498  
 499  two_byte_offset_repeat_as_copy_encodeBlockAsm:
 500  	CMPL SI, $0x40
 501  	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm
 502  	CMPL DI, $0x00000800
 503  	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm
 504  	MOVL $0x00000001, R8
 505  	LEAL 16(R8), R8
 506  	MOVB DI, 1(CX)
 507  	MOVL DI, R9
 508  	SHRL $0x08, R9
 509  	SHLL $0x05, R9
 510  	ORL  R9, R8
 511  	MOVB R8, (CX)
 512  	ADDQ $0x02, CX
 513  	SUBL $0x08, SI
 514  
 515  	// emitRepeat
 516  	LEAL -4(SI), SI
 517  	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 518  
 519  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
 520  	MOVL SI, R8
 521  	LEAL -4(SI), SI
 522  	CMPL R8, $0x08
 523  	JBE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 524  	CMPL R8, $0x0c
 525  	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 526  	CMPL DI, $0x00000800
 527  	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 528  
 529  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
 530  	CMPL SI, $0x00000104
 531  	JB   repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 532  	CMPL SI, $0x00010100
 533  	JB   repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 534  	CMPL SI, $0x0100ffff
 535  	JB   repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 536  	LEAL -16842747(SI), SI
 537  	MOVL $0xfffb001d, (CX)
 538  	MOVB $0xff, 4(CX)
 539  	ADDQ $0x05, CX
 540  	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 541  
 542  repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
 543  	LEAL -65536(SI), SI
 544  	MOVL SI, DI
 545  	MOVW $0x001d, (CX)
 546  	MOVW SI, 2(CX)
 547  	SARL $0x10, DI
 548  	MOVB DI, 4(CX)
 549  	ADDQ $0x05, CX
 550  	JMP  repeat_end_emit_encodeBlockAsm
 551  
 552  repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
 553  	LEAL -256(SI), SI
 554  	MOVW $0x0019, (CX)
 555  	MOVW SI, 2(CX)
 556  	ADDQ $0x04, CX
 557  	JMP  repeat_end_emit_encodeBlockAsm
 558  
 559  repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
 560  	LEAL -4(SI), SI
 561  	MOVW $0x0015, (CX)
 562  	MOVB SI, 2(CX)
 563  	ADDQ $0x03, CX
 564  	JMP  repeat_end_emit_encodeBlockAsm
 565  
 566  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
 567  	SHLL $0x02, SI
 568  	ORL  $0x01, SI
 569  	MOVW SI, (CX)
 570  	ADDQ $0x02, CX
 571  	JMP  repeat_end_emit_encodeBlockAsm
 572  
 573  repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
 574  	XORQ R8, R8
 575  	LEAL 1(R8)(SI*4), SI
 576  	MOVB DI, 1(CX)
 577  	SARL $0x08, DI
 578  	SHLL $0x05, DI
 579  	ORL  DI, SI
 580  	MOVB SI, (CX)
 581  	ADDQ $0x02, CX
 582  	JMP  repeat_end_emit_encodeBlockAsm
 583  
 584  long_offset_short_repeat_as_copy_encodeBlockAsm:
 585  	MOVB $0xee, (CX)
 586  	MOVW DI, 1(CX)
 587  	LEAL -60(SI), SI
 588  	ADDQ $0x03, CX
 589  
 590  	// emitRepeat
 591  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short:
 592  	MOVL SI, R8
 593  	LEAL -4(SI), SI
 594  	CMPL R8, $0x08
 595  	JBE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short
 596  	CMPL R8, $0x0c
 597  	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
 598  	CMPL DI, $0x00000800
 599  	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
 600  
 601  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
 602  	CMPL SI, $0x00000104
 603  	JB   repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short
 604  	CMPL SI, $0x00010100
 605  	JB   repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short
 606  	CMPL SI, $0x0100ffff
 607  	JB   repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short
 608  	LEAL -16842747(SI), SI
 609  	MOVL $0xfffb001d, (CX)
 610  	MOVB $0xff, 4(CX)
 611  	ADDQ $0x05, CX
 612  	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short
 613  
 614  repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short:
 615  	LEAL -65536(SI), SI
 616  	MOVL SI, DI
 617  	MOVW $0x001d, (CX)
 618  	MOVW SI, 2(CX)
 619  	SARL $0x10, DI
 620  	MOVB DI, 4(CX)
 621  	ADDQ $0x05, CX
 622  	JMP  repeat_end_emit_encodeBlockAsm
 623  
 624  repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short:
 625  	LEAL -256(SI), SI
 626  	MOVW $0x0019, (CX)
 627  	MOVW SI, 2(CX)
 628  	ADDQ $0x04, CX
 629  	JMP  repeat_end_emit_encodeBlockAsm
 630  
 631  repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short:
 632  	LEAL -4(SI), SI
 633  	MOVW $0x0015, (CX)
 634  	MOVB SI, 2(CX)
 635  	ADDQ $0x03, CX
 636  	JMP  repeat_end_emit_encodeBlockAsm
 637  
 638  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short:
 639  	SHLL $0x02, SI
 640  	ORL  $0x01, SI
 641  	MOVW SI, (CX)
 642  	ADDQ $0x02, CX
 643  	JMP  repeat_end_emit_encodeBlockAsm
 644  
 645  repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
 646  	XORQ R8, R8
 647  	LEAL 1(R8)(SI*4), SI
 648  	MOVB DI, 1(CX)
 649  	SARL $0x08, DI
 650  	SHLL $0x05, DI
 651  	ORL  DI, SI
 652  	MOVB SI, (CX)
 653  	ADDQ $0x02, CX
 654  	JMP  repeat_end_emit_encodeBlockAsm
 655  
 656  two_byte_offset_short_repeat_as_copy_encodeBlockAsm:
 657  	MOVL SI, R8
 658  	SHLL $0x02, R8
 659  	CMPL SI, $0x0c
 660  	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm
 661  	CMPL DI, $0x00000800
 662  	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm
 663  	LEAL -15(R8), R8
 664  	MOVB DI, 1(CX)
 665  	SHRL $0x08, DI
 666  	SHLL $0x05, DI
 667  	ORL  DI, R8
 668  	MOVB R8, (CX)
 669  	ADDQ $0x02, CX
 670  	JMP  repeat_end_emit_encodeBlockAsm
 671  
 672  emit_copy_three_repeat_as_copy_encodeBlockAsm:
 673  	LEAL -2(R8), R8
 674  	MOVB R8, (CX)
 675  	MOVW DI, 1(CX)
 676  	ADDQ $0x03, CX
 677  
 678  repeat_end_emit_encodeBlockAsm:
 679  	MOVL DX, 12(SP)
 680  	JMP  search_loop_encodeBlockAsm
 681  
 682  no_repeat_found_encodeBlockAsm:
 683  	CMPL (BX)(SI*1), DI
 684  	JEQ  candidate_match_encodeBlockAsm
 685  	SHRQ $0x08, DI
 686  	MOVL (AX)(R10*4), SI
 687  	LEAL 2(DX), R9
 688  	CMPL (BX)(R8*1), DI
 689  	JEQ  candidate2_match_encodeBlockAsm
 690  	MOVL R9, (AX)(R10*4)
 691  	SHRQ $0x08, DI
 692  	CMPL (BX)(SI*1), DI
 693  	JEQ  candidate3_match_encodeBlockAsm
 694  	MOVL 20(SP), DX
 695  	JMP  search_loop_encodeBlockAsm
 696  
 697  candidate3_match_encodeBlockAsm:
 698  	ADDL $0x02, DX
 699  	JMP  candidate_match_encodeBlockAsm
 700  
 701  candidate2_match_encodeBlockAsm:
 702  	MOVL R9, (AX)(R10*4)
 703  	INCL DX
 704  	MOVL R8, SI
 705  
 706  candidate_match_encodeBlockAsm:
 707  	MOVL  12(SP), DI
 708  	TESTL SI, SI
 709  	JZ    match_extend_back_end_encodeBlockAsm
 710  
 711  match_extend_back_loop_encodeBlockAsm:
 712  	CMPL DX, DI
 713  	JBE  match_extend_back_end_encodeBlockAsm
 714  	MOVB -1(BX)(SI*1), R8
 715  	MOVB -1(BX)(DX*1), R9
 716  	CMPB R8, R9
 717  	JNE  match_extend_back_end_encodeBlockAsm
 718  	LEAL -1(DX), DX
 719  	DECL SI
 720  	JZ   match_extend_back_end_encodeBlockAsm
 721  	JMP  match_extend_back_loop_encodeBlockAsm
 722  
 723  match_extend_back_end_encodeBlockAsm:
 724  	MOVL DX, DI
 725  	SUBL 12(SP), DI
 726  	LEAQ 5(CX)(DI*1), DI
 727  	CMPQ DI, (SP)
 728  	JB   match_dst_size_check_encodeBlockAsm
 729  	MOVQ $0x00000000, ret+56(FP)
 730  	RET
 731  
 732  match_dst_size_check_encodeBlockAsm:
 733  	MOVL DX, DI
 734  	MOVL 12(SP), R8
 735  	CMPL R8, DI
 736  	JEQ  emit_literal_done_match_emit_encodeBlockAsm
 737  	MOVL DI, R9
 738  	MOVL DI, 12(SP)
 739  	LEAQ (BX)(R8*1), DI
 740  	SUBL R8, R9
 741  	LEAL -1(R9), R8
 742  	CMPL R8, $0x3c
 743  	JB   one_byte_match_emit_encodeBlockAsm
 744  	CMPL R8, $0x00000100
 745  	JB   two_bytes_match_emit_encodeBlockAsm
 746  	CMPL R8, $0x00010000
 747  	JB   three_bytes_match_emit_encodeBlockAsm
 748  	CMPL R8, $0x01000000
 749  	JB   four_bytes_match_emit_encodeBlockAsm
 750  	MOVB $0xfc, (CX)
 751  	MOVL R8, 1(CX)
 752  	ADDQ $0x05, CX
 753  	JMP  memmove_long_match_emit_encodeBlockAsm
 754  
 755  four_bytes_match_emit_encodeBlockAsm:
 756  	MOVL R8, R10
 757  	SHRL $0x10, R10
 758  	MOVB $0xf8, (CX)
 759  	MOVW R8, 1(CX)
 760  	MOVB R10, 3(CX)
 761  	ADDQ $0x04, CX
 762  	JMP  memmove_long_match_emit_encodeBlockAsm
 763  
 764  three_bytes_match_emit_encodeBlockAsm:
 765  	MOVB $0xf4, (CX)
 766  	MOVW R8, 1(CX)
 767  	ADDQ $0x03, CX
 768  	JMP  memmove_long_match_emit_encodeBlockAsm
 769  
 770  two_bytes_match_emit_encodeBlockAsm:
 771  	MOVB $0xf0, (CX)
 772  	MOVB R8, 1(CX)
 773  	ADDQ $0x02, CX
 774  	CMPL R8, $0x40
 775  	JB   memmove_match_emit_encodeBlockAsm
 776  	JMP  memmove_long_match_emit_encodeBlockAsm
 777  
 778  one_byte_match_emit_encodeBlockAsm:
 779  	SHLB $0x02, R8
 780  	MOVB R8, (CX)
 781  	ADDQ $0x01, CX
 782  
 783  memmove_match_emit_encodeBlockAsm:
 784  	LEAQ (CX)(R9*1), R8
 785  
 786  	// genMemMoveShort
 787  	CMPQ R9, $0x08
 788  	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8
 789  	CMPQ R9, $0x10
 790  	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16
 791  	CMPQ R9, $0x20
 792  	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
 793  	JMP  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64
 794  
 795  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8:
 796  	MOVQ (DI), R10
 797  	MOVQ R10, (CX)
 798  	JMP  memmove_end_copy_match_emit_encodeBlockAsm
 799  
 800  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16:
 801  	MOVQ (DI), R10
 802  	MOVQ -8(DI)(R9*1), DI
 803  	MOVQ R10, (CX)
 804  	MOVQ DI, -8(CX)(R9*1)
 805  	JMP  memmove_end_copy_match_emit_encodeBlockAsm
 806  
 807  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
 808  	MOVOU (DI), X0
 809  	MOVOU -16(DI)(R9*1), X1
 810  	MOVOU X0, (CX)
 811  	MOVOU X1, -16(CX)(R9*1)
 812  	JMP   memmove_end_copy_match_emit_encodeBlockAsm
 813  
 814  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
 815  	MOVOU (DI), X0
 816  	MOVOU 16(DI), X1
 817  	MOVOU -32(DI)(R9*1), X2
 818  	MOVOU -16(DI)(R9*1), X3
 819  	MOVOU X0, (CX)
 820  	MOVOU X1, 16(CX)
 821  	MOVOU X2, -32(CX)(R9*1)
 822  	MOVOU X3, -16(CX)(R9*1)
 823  
 824  memmove_end_copy_match_emit_encodeBlockAsm:
 825  	MOVQ R8, CX
 826  	JMP  emit_literal_done_match_emit_encodeBlockAsm
 827  
 828  memmove_long_match_emit_encodeBlockAsm:
 829  	LEAQ (CX)(R9*1), R8
 830  
 831  	// genMemMoveLong
 832  	MOVOU (DI), X0
 833  	MOVOU 16(DI), X1
 834  	MOVOU -32(DI)(R9*1), X2
 835  	MOVOU -16(DI)(R9*1), X3
 836  	MOVQ  R9, R11
 837  	SHRQ  $0x05, R11
 838  	MOVQ  CX, R10
 839  	ANDL  $0x0000001f, R10
 840  	MOVQ  $0x00000040, R12
 841  	SUBQ  R10, R12
 842  	DECQ  R11
 843  	JA    emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
 844  	LEAQ  -32(DI)(R12*1), R10
 845  	LEAQ  -32(CX)(R12*1), R13
 846  
 847  emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back:
 848  	MOVOU (R10), X4
 849  	MOVOU 16(R10), X5
 850  	MOVOA X4, (R13)
 851  	MOVOA X5, 16(R13)
 852  	ADDQ  $0x20, R13
 853  	ADDQ  $0x20, R10
 854  	ADDQ  $0x20, R12
 855  	DECQ  R11
 856  	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back
 857  
 858  emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32:
 859  	MOVOU -32(DI)(R12*1), X4
 860  	MOVOU -16(DI)(R12*1), X5
 861  	MOVOA X4, -32(CX)(R12*1)
 862  	MOVOA X5, -16(CX)(R12*1)
 863  	ADDQ  $0x20, R12
 864  	CMPQ  R9, R12
 865  	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
 866  	MOVOU X0, (CX)
 867  	MOVOU X1, 16(CX)
 868  	MOVOU X2, -32(CX)(R9*1)
 869  	MOVOU X3, -16(CX)(R9*1)
 870  	MOVQ  R8, CX
 871  
 872  emit_literal_done_match_emit_encodeBlockAsm:
 873  match_nolit_loop_encodeBlockAsm:
 874  	MOVL DX, DI
 875  	SUBL SI, DI
 876  	MOVL DI, 16(SP)
 877  	ADDL $0x04, DX
 878  	ADDL $0x04, SI
 879  	MOVQ src_len+32(FP), DI
 880  	SUBL DX, DI
 881  	LEAQ (BX)(DX*1), R8
 882  	LEAQ (BX)(SI*1), SI
 883  
 884  	// matchLen
 885  	XORL R10, R10
 886  
 887  matchlen_loopback_16_match_nolit_encodeBlockAsm:
 888  	CMPL DI, $0x10
 889  	JB   matchlen_match8_match_nolit_encodeBlockAsm
 890  	MOVQ (R8)(R10*1), R9
 891  	MOVQ 8(R8)(R10*1), R11
 892  	XORQ (SI)(R10*1), R9
 893  	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm
 894  	XORQ 8(SI)(R10*1), R11
 895  	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm
 896  	LEAL -16(DI), DI
 897  	LEAL 16(R10), R10
 898  	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm
 899  
 900  matchlen_bsf_16match_nolit_encodeBlockAsm:
 901  #ifdef GOAMD64_v3
 902  	TZCNTQ R11, R11
 903  
 904  #else
 905  	BSFQ R11, R11
 906  
 907  #endif
 908  	SARQ $0x03, R11
 909  	LEAL 8(R10)(R11*1), R10
 910  	JMP  match_nolit_end_encodeBlockAsm
 911  
 912  matchlen_match8_match_nolit_encodeBlockAsm:
 913  	CMPL DI, $0x08
 914  	JB   matchlen_match4_match_nolit_encodeBlockAsm
 915  	MOVQ (R8)(R10*1), R9
 916  	XORQ (SI)(R10*1), R9
 917  	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm
 918  	LEAL -8(DI), DI
 919  	LEAL 8(R10), R10
 920  	JMP  matchlen_match4_match_nolit_encodeBlockAsm
 921  
 922  matchlen_bsf_8_match_nolit_encodeBlockAsm:
 923  #ifdef GOAMD64_v3
 924  	TZCNTQ R9, R9
 925  
 926  #else
 927  	BSFQ R9, R9
 928  
 929  #endif
 930  	SARQ $0x03, R9
 931  	LEAL (R10)(R9*1), R10
 932  	JMP  match_nolit_end_encodeBlockAsm
 933  
 934  matchlen_match4_match_nolit_encodeBlockAsm:
 935  	CMPL DI, $0x04
 936  	JB   matchlen_match2_match_nolit_encodeBlockAsm
 937  	MOVL (R8)(R10*1), R9
 938  	CMPL (SI)(R10*1), R9
 939  	JNE  matchlen_match2_match_nolit_encodeBlockAsm
 940  	LEAL -4(DI), DI
 941  	LEAL 4(R10), R10
 942  
 943  matchlen_match2_match_nolit_encodeBlockAsm:
 944  	CMPL DI, $0x01
 945  	JE   matchlen_match1_match_nolit_encodeBlockAsm
 946  	JB   match_nolit_end_encodeBlockAsm
 947  	MOVW (R8)(R10*1), R9
 948  	CMPW (SI)(R10*1), R9
 949  	JNE  matchlen_match1_match_nolit_encodeBlockAsm
 950  	LEAL 2(R10), R10
 951  	SUBL $0x02, DI
 952  	JZ   match_nolit_end_encodeBlockAsm
 953  
 954  matchlen_match1_match_nolit_encodeBlockAsm:
 955  	MOVB (R8)(R10*1), R9
 956  	CMPB (SI)(R10*1), R9
 957  	JNE  match_nolit_end_encodeBlockAsm
 958  	LEAL 1(R10), R10
 959  
 960  match_nolit_end_encodeBlockAsm:
 961  	ADDL R10, DX
 962  	MOVL 16(SP), SI
 963  	ADDL $0x04, R10
 964  	MOVL DX, 12(SP)
 965  
 966  	// emitCopy
 967  	CMPL SI, $0x00010000
 968  	JB   two_byte_offset_match_nolit_encodeBlockAsm
 969  	CMPL R10, $0x40
 970  	JBE  four_bytes_remain_match_nolit_encodeBlockAsm
 971  	MOVB $0xff, (CX)
 972  	MOVL SI, 1(CX)
 973  	LEAL -64(R10), R10
 974  	ADDQ $0x05, CX
 975  	CMPL R10, $0x04
 976  	JB   four_bytes_remain_match_nolit_encodeBlockAsm
 977  
 978  	// emitRepeat
 979  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy:
 980  	MOVL R10, DI
 981  	LEAL -4(R10), R10
 982  	CMPL DI, $0x08
 983  	JBE  repeat_two_match_nolit_encodeBlockAsm_emit_copy
 984  	CMPL DI, $0x0c
 985  	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
 986  	CMPL SI, $0x00000800
 987  	JB   repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
 988  
 989  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
 990  	CMPL R10, $0x00000104
 991  	JB   repeat_three_match_nolit_encodeBlockAsm_emit_copy
 992  	CMPL R10, $0x00010100
 993  	JB   repeat_four_match_nolit_encodeBlockAsm_emit_copy
 994  	CMPL R10, $0x0100ffff
 995  	JB   repeat_five_match_nolit_encodeBlockAsm_emit_copy
 996  	LEAL -16842747(R10), R10
 997  	MOVL $0xfffb001d, (CX)
 998  	MOVB $0xff, 4(CX)
 999  	ADDQ $0x05, CX
1000  	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy
1001  
1002  repeat_five_match_nolit_encodeBlockAsm_emit_copy:
1003  	LEAL -65536(R10), R10
1004  	MOVL R10, SI
1005  	MOVW $0x001d, (CX)
1006  	MOVW R10, 2(CX)
1007  	SARL $0x10, SI
1008  	MOVB SI, 4(CX)
1009  	ADDQ $0x05, CX
1010  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1011  
1012  repeat_four_match_nolit_encodeBlockAsm_emit_copy:
1013  	LEAL -256(R10), R10
1014  	MOVW $0x0019, (CX)
1015  	MOVW R10, 2(CX)
1016  	ADDQ $0x04, CX
1017  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1018  
1019  repeat_three_match_nolit_encodeBlockAsm_emit_copy:
1020  	LEAL -4(R10), R10
1021  	MOVW $0x0015, (CX)
1022  	MOVB R10, 2(CX)
1023  	ADDQ $0x03, CX
1024  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1025  
1026  repeat_two_match_nolit_encodeBlockAsm_emit_copy:
1027  	SHLL $0x02, R10
1028  	ORL  $0x01, R10
1029  	MOVW R10, (CX)
1030  	ADDQ $0x02, CX
1031  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1032  
1033  repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
1034  	XORQ DI, DI
1035  	LEAL 1(DI)(R10*4), R10
1036  	MOVB SI, 1(CX)
1037  	SARL $0x08, SI
1038  	SHLL $0x05, SI
1039  	ORL  SI, R10
1040  	MOVB R10, (CX)
1041  	ADDQ $0x02, CX
1042  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1043  
1044  four_bytes_remain_match_nolit_encodeBlockAsm:
1045  	TESTL R10, R10
1046  	JZ    match_nolit_emitcopy_end_encodeBlockAsm
1047  	XORL  DI, DI
1048  	LEAL  -1(DI)(R10*4), R10
1049  	MOVB  R10, (CX)
1050  	MOVL  SI, 1(CX)
1051  	ADDQ  $0x05, CX
1052  	JMP   match_nolit_emitcopy_end_encodeBlockAsm
1053  
1054  two_byte_offset_match_nolit_encodeBlockAsm:
1055  	CMPL R10, $0x40
1056  	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm
1057  	CMPL SI, $0x00000800
1058  	JAE  long_offset_short_match_nolit_encodeBlockAsm
1059  	MOVL $0x00000001, DI
1060  	LEAL 16(DI), DI
1061  	MOVB SI, 1(CX)
1062  	MOVL SI, R8
1063  	SHRL $0x08, R8
1064  	SHLL $0x05, R8
1065  	ORL  R8, DI
1066  	MOVB DI, (CX)
1067  	ADDQ $0x02, CX
1068  	SUBL $0x08, R10
1069  
1070  	// emitRepeat
1071  	LEAL -4(R10), R10
1072  	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
1073  
1074  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b:
1075  	MOVL R10, DI
1076  	LEAL -4(R10), R10
1077  	CMPL DI, $0x08
1078  	JBE  repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b
1079  	CMPL DI, $0x0c
1080  	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
1081  	CMPL SI, $0x00000800
1082  	JB   repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
1083  
1084  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b:
1085  	CMPL R10, $0x00000104
1086  	JB   repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b
1087  	CMPL R10, $0x00010100
1088  	JB   repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b
1089  	CMPL R10, $0x0100ffff
1090  	JB   repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b
1091  	LEAL -16842747(R10), R10
1092  	MOVL $0xfffb001d, (CX)
1093  	MOVB $0xff, 4(CX)
1094  	ADDQ $0x05, CX
1095  	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b
1096  
1097  repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b:
1098  	LEAL -65536(R10), R10
1099  	MOVL R10, SI
1100  	MOVW $0x001d, (CX)
1101  	MOVW R10, 2(CX)
1102  	SARL $0x10, SI
1103  	MOVB SI, 4(CX)
1104  	ADDQ $0x05, CX
1105  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1106  
1107  repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b:
1108  	LEAL -256(R10), R10
1109  	MOVW $0x0019, (CX)
1110  	MOVW R10, 2(CX)
1111  	ADDQ $0x04, CX
1112  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1113  
1114  repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b:
1115  	LEAL -4(R10), R10
1116  	MOVW $0x0015, (CX)
1117  	MOVB R10, 2(CX)
1118  	ADDQ $0x03, CX
1119  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1120  
1121  repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b:
1122  	SHLL $0x02, R10
1123  	ORL  $0x01, R10
1124  	MOVW R10, (CX)
1125  	ADDQ $0x02, CX
1126  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1127  
1128  repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b:
1129  	XORQ DI, DI
1130  	LEAL 1(DI)(R10*4), R10
1131  	MOVB SI, 1(CX)
1132  	SARL $0x08, SI
1133  	SHLL $0x05, SI
1134  	ORL  SI, R10
1135  	MOVB R10, (CX)
1136  	ADDQ $0x02, CX
1137  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1138  
1139  long_offset_short_match_nolit_encodeBlockAsm:
1140  	MOVB $0xee, (CX)
1141  	MOVW SI, 1(CX)
1142  	LEAL -60(R10), R10
1143  	ADDQ $0x03, CX
1144  
1145  	// emitRepeat
1146  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short:
1147  	MOVL R10, DI
1148  	LEAL -4(R10), R10
1149  	CMPL DI, $0x08
1150  	JBE  repeat_two_match_nolit_encodeBlockAsm_emit_copy_short
1151  	CMPL DI, $0x0c
1152  	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
1153  	CMPL SI, $0x00000800
1154  	JB   repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
1155  
1156  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
1157  	CMPL R10, $0x00000104
1158  	JB   repeat_three_match_nolit_encodeBlockAsm_emit_copy_short
1159  	CMPL R10, $0x00010100
1160  	JB   repeat_four_match_nolit_encodeBlockAsm_emit_copy_short
1161  	CMPL R10, $0x0100ffff
1162  	JB   repeat_five_match_nolit_encodeBlockAsm_emit_copy_short
1163  	LEAL -16842747(R10), R10
1164  	MOVL $0xfffb001d, (CX)
1165  	MOVB $0xff, 4(CX)
1166  	ADDQ $0x05, CX
1167  	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short
1168  
1169  repeat_five_match_nolit_encodeBlockAsm_emit_copy_short:
1170  	LEAL -65536(R10), R10
1171  	MOVL R10, SI
1172  	MOVW $0x001d, (CX)
1173  	MOVW R10, 2(CX)
1174  	SARL $0x10, SI
1175  	MOVB SI, 4(CX)
1176  	ADDQ $0x05, CX
1177  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1178  
1179  repeat_four_match_nolit_encodeBlockAsm_emit_copy_short:
1180  	LEAL -256(R10), R10
1181  	MOVW $0x0019, (CX)
1182  	MOVW R10, 2(CX)
1183  	ADDQ $0x04, CX
1184  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1185  
1186  repeat_three_match_nolit_encodeBlockAsm_emit_copy_short:
1187  	LEAL -4(R10), R10
1188  	MOVW $0x0015, (CX)
1189  	MOVB R10, 2(CX)
1190  	ADDQ $0x03, CX
1191  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1192  
1193  repeat_two_match_nolit_encodeBlockAsm_emit_copy_short:
1194  	SHLL $0x02, R10
1195  	ORL  $0x01, R10
1196  	MOVW R10, (CX)
1197  	ADDQ $0x02, CX
1198  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1199  
1200  repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
1201  	XORQ DI, DI
1202  	LEAL 1(DI)(R10*4), R10
1203  	MOVB SI, 1(CX)
1204  	SARL $0x08, SI
1205  	SHLL $0x05, SI
1206  	ORL  SI, R10
1207  	MOVB R10, (CX)
1208  	ADDQ $0x02, CX
1209  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1210  
1211  two_byte_offset_short_match_nolit_encodeBlockAsm:
1212  	MOVL R10, DI
1213  	SHLL $0x02, DI
1214  	CMPL R10, $0x0c
1215  	JAE  emit_copy_three_match_nolit_encodeBlockAsm
1216  	CMPL SI, $0x00000800
1217  	JAE  emit_copy_three_match_nolit_encodeBlockAsm
1218  	LEAL -15(DI), DI
1219  	MOVB SI, 1(CX)
1220  	SHRL $0x08, SI
1221  	SHLL $0x05, SI
1222  	ORL  SI, DI
1223  	MOVB DI, (CX)
1224  	ADDQ $0x02, CX
1225  	JMP  match_nolit_emitcopy_end_encodeBlockAsm
1226  
1227  emit_copy_three_match_nolit_encodeBlockAsm:
1228  	LEAL -2(DI), DI
1229  	MOVB DI, (CX)
1230  	MOVW SI, 1(CX)
1231  	ADDQ $0x03, CX
1232  
1233  match_nolit_emitcopy_end_encodeBlockAsm:
1234  	CMPL DX, 8(SP)
1235  	JAE  emit_remainder_encodeBlockAsm
1236  	MOVQ -2(BX)(DX*1), DI
1237  	CMPQ CX, (SP)
1238  	JB   match_nolit_dst_ok_encodeBlockAsm
1239  	MOVQ $0x00000000, ret+56(FP)
1240  	RET
1241  
1242  match_nolit_dst_ok_encodeBlockAsm:
1243  	MOVQ  $0x0000cf1bbcdcbf9b, R9
1244  	MOVQ  DI, R8
1245  	SHRQ  $0x10, DI
1246  	MOVQ  DI, SI
1247  	SHLQ  $0x10, R8
1248  	IMULQ R9, R8
1249  	SHRQ  $0x32, R8
1250  	SHLQ  $0x10, SI
1251  	IMULQ R9, SI
1252  	SHRQ  $0x32, SI
1253  	LEAL  -2(DX), R9
1254  	LEAQ  (AX)(SI*4), R10
1255  	MOVL  (R10), SI
1256  	MOVL  R9, (AX)(R8*4)
1257  	MOVL  DX, (R10)
1258  	CMPL  (BX)(SI*1), DI
1259  	JEQ   match_nolit_loop_encodeBlockAsm
1260  	INCL  DX
1261  	JMP   search_loop_encodeBlockAsm
1262  
1263  emit_remainder_encodeBlockAsm:
1264  	MOVQ src_len+32(FP), AX
1265  	SUBL 12(SP), AX
1266  	LEAQ 5(CX)(AX*1), AX
1267  	CMPQ AX, (SP)
1268  	JB   emit_remainder_ok_encodeBlockAsm
1269  	MOVQ $0x00000000, ret+56(FP)
1270  	RET
1271  
1272  emit_remainder_ok_encodeBlockAsm:
1273  	MOVQ src_len+32(FP), AX
1274  	MOVL 12(SP), DX
1275  	CMPL DX, AX
1276  	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm
1277  	MOVL AX, SI
1278  	MOVL AX, 12(SP)
1279  	LEAQ (BX)(DX*1), AX
1280  	SUBL DX, SI
1281  	LEAL -1(SI), DX
1282  	CMPL DX, $0x3c
1283  	JB   one_byte_emit_remainder_encodeBlockAsm
1284  	CMPL DX, $0x00000100
1285  	JB   two_bytes_emit_remainder_encodeBlockAsm
1286  	CMPL DX, $0x00010000
1287  	JB   three_bytes_emit_remainder_encodeBlockAsm
1288  	CMPL DX, $0x01000000
1289  	JB   four_bytes_emit_remainder_encodeBlockAsm
1290  	MOVB $0xfc, (CX)
1291  	MOVL DX, 1(CX)
1292  	ADDQ $0x05, CX
1293  	JMP  memmove_long_emit_remainder_encodeBlockAsm
1294  
1295  four_bytes_emit_remainder_encodeBlockAsm:
1296  	MOVL DX, BX
1297  	SHRL $0x10, BX
1298  	MOVB $0xf8, (CX)
1299  	MOVW DX, 1(CX)
1300  	MOVB BL, 3(CX)
1301  	ADDQ $0x04, CX
1302  	JMP  memmove_long_emit_remainder_encodeBlockAsm
1303  
1304  three_bytes_emit_remainder_encodeBlockAsm:
1305  	MOVB $0xf4, (CX)
1306  	MOVW DX, 1(CX)
1307  	ADDQ $0x03, CX
1308  	JMP  memmove_long_emit_remainder_encodeBlockAsm
1309  
1310  two_bytes_emit_remainder_encodeBlockAsm:
1311  	MOVB $0xf0, (CX)
1312  	MOVB DL, 1(CX)
1313  	ADDQ $0x02, CX
1314  	CMPL DX, $0x40
1315  	JB   memmove_emit_remainder_encodeBlockAsm
1316  	JMP  memmove_long_emit_remainder_encodeBlockAsm
1317  
1318  one_byte_emit_remainder_encodeBlockAsm:
1319  	SHLB $0x02, DL
1320  	MOVB DL, (CX)
1321  	ADDQ $0x01, CX
1322  
1323  memmove_emit_remainder_encodeBlockAsm:
1324  	LEAQ (CX)(SI*1), DX
1325  	MOVL SI, BX
1326  
1327  	// genMemMoveShort
1328  	CMPQ BX, $0x03
1329  	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2
1330  	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3
1331  	CMPQ BX, $0x08
1332  	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7
1333  	CMPQ BX, $0x10
1334  	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16
1335  	CMPQ BX, $0x20
1336  	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32
1337  	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64
1338  
1339  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2:
1340  	MOVB (AX), SI
1341  	MOVB -1(AX)(BX*1), AL
1342  	MOVB SI, (CX)
1343  	MOVB AL, -1(CX)(BX*1)
1344  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
1345  
1346  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3:
1347  	MOVW (AX), SI
1348  	MOVB 2(AX), AL
1349  	MOVW SI, (CX)
1350  	MOVB AL, 2(CX)
1351  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
1352  
1353  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7:
1354  	MOVL (AX), SI
1355  	MOVL -4(AX)(BX*1), AX
1356  	MOVL SI, (CX)
1357  	MOVL AX, -4(CX)(BX*1)
1358  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
1359  
1360  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16:
1361  	MOVQ (AX), SI
1362  	MOVQ -8(AX)(BX*1), AX
1363  	MOVQ SI, (CX)
1364  	MOVQ AX, -8(CX)(BX*1)
1365  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
1366  
1367  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
1368  	MOVOU (AX), X0
1369  	MOVOU -16(AX)(BX*1), X1
1370  	MOVOU X0, (CX)
1371  	MOVOU X1, -16(CX)(BX*1)
1372  	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm
1373  
1374  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
1375  	MOVOU (AX), X0
1376  	MOVOU 16(AX), X1
1377  	MOVOU -32(AX)(BX*1), X2
1378  	MOVOU -16(AX)(BX*1), X3
1379  	MOVOU X0, (CX)
1380  	MOVOU X1, 16(CX)
1381  	MOVOU X2, -32(CX)(BX*1)
1382  	MOVOU X3, -16(CX)(BX*1)
1383  
1384  memmove_end_copy_emit_remainder_encodeBlockAsm:
1385  	MOVQ DX, CX
1386  	JMP  emit_literal_done_emit_remainder_encodeBlockAsm
1387  
1388  memmove_long_emit_remainder_encodeBlockAsm:
1389  	LEAQ (CX)(SI*1), DX
1390  	MOVL SI, BX
1391  
1392  	// genMemMoveLong
1393  	MOVOU (AX), X0
1394  	MOVOU 16(AX), X1
1395  	MOVOU -32(AX)(BX*1), X2
1396  	MOVOU -16(AX)(BX*1), X3
1397  	MOVQ  BX, DI
1398  	SHRQ  $0x05, DI
1399  	MOVQ  CX, SI
1400  	ANDL  $0x0000001f, SI
1401  	MOVQ  $0x00000040, R8
1402  	SUBQ  SI, R8
1403  	DECQ  DI
1404  	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
1405  	LEAQ  -32(AX)(R8*1), SI
1406  	LEAQ  -32(CX)(R8*1), R9
1407  
1408  emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back:
1409  	MOVOU (SI), X4
1410  	MOVOU 16(SI), X5
1411  	MOVOA X4, (R9)
1412  	MOVOA X5, 16(R9)
1413  	ADDQ  $0x20, R9
1414  	ADDQ  $0x20, SI
1415  	ADDQ  $0x20, R8
1416  	DECQ  DI
1417  	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back
1418  
1419  emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32:
1420  	MOVOU -32(AX)(R8*1), X4
1421  	MOVOU -16(AX)(R8*1), X5
1422  	MOVOA X4, -32(CX)(R8*1)
1423  	MOVOA X5, -16(CX)(R8*1)
1424  	ADDQ  $0x20, R8
1425  	CMPQ  BX, R8
1426  	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
1427  	MOVOU X0, (CX)
1428  	MOVOU X1, 16(CX)
1429  	MOVOU X2, -32(CX)(BX*1)
1430  	MOVOU X3, -16(CX)(BX*1)
1431  	MOVQ  DX, CX
1432  
1433  emit_literal_done_emit_remainder_encodeBlockAsm:
1434  	MOVQ dst_base+0(FP), AX
1435  	SUBQ AX, CX
1436  	MOVQ CX, ret+56(FP)
1437  	RET
1438  
1439  // func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int
1440  // Requires: BMI, SSE2
1441  TEXT ·encodeBlockAsm4MB(SB), $24-64
1442  	MOVQ tmp+48(FP), AX
1443  	MOVQ dst_base+0(FP), CX
1444  	MOVQ $0x00000200, DX
1445  	MOVQ AX, BX
1446  	PXOR X0, X0
1447  
1448  zero_loop_encodeBlockAsm4MB:
1449  	MOVOU X0, (BX)
1450  	MOVOU X0, 16(BX)
1451  	MOVOU X0, 32(BX)
1452  	MOVOU X0, 48(BX)
1453  	MOVOU X0, 64(BX)
1454  	MOVOU X0, 80(BX)
1455  	MOVOU X0, 96(BX)
1456  	MOVOU X0, 112(BX)
1457  	ADDQ  $0x80, BX
1458  	DECQ  DX
1459  	JNZ   zero_loop_encodeBlockAsm4MB
1460  	MOVL  $0x00000000, 12(SP)
1461  	MOVQ  src_len+32(FP), DX
1462  	LEAQ  -9(DX), BX
1463  	LEAQ  -8(DX), SI
1464  	MOVL  SI, 8(SP)
1465  	SHRQ  $0x05, DX
1466  	SUBL  DX, BX
1467  	LEAQ  (CX)(BX*1), BX
1468  	MOVQ  BX, (SP)
1469  	MOVL  $0x00000001, DX
1470  	MOVL  DX, 16(SP)
1471  	MOVQ  src_base+24(FP), BX
1472  
1473  search_loop_encodeBlockAsm4MB:
1474  	MOVL  DX, SI
1475  	SUBL  12(SP), SI
1476  	SHRL  $0x06, SI
1477  	LEAL  4(DX)(SI*1), SI
1478  	CMPL  SI, 8(SP)
1479  	JAE   emit_remainder_encodeBlockAsm4MB
1480  	MOVQ  (BX)(DX*1), DI
1481  	MOVL  SI, 20(SP)
1482  	MOVQ  $0x0000cf1bbcdcbf9b, R9
1483  	MOVQ  DI, R10
1484  	MOVQ  DI, R11
1485  	SHRQ  $0x08, R11
1486  	SHLQ  $0x10, R10
1487  	IMULQ R9, R10
1488  	SHRQ  $0x32, R10
1489  	SHLQ  $0x10, R11
1490  	IMULQ R9, R11
1491  	SHRQ  $0x32, R11
1492  	MOVL  (AX)(R10*4), SI
1493  	MOVL  (AX)(R11*4), R8
1494  	MOVL  DX, (AX)(R10*4)
1495  	LEAL  1(DX), R10
1496  	MOVL  R10, (AX)(R11*4)
1497  	MOVQ  DI, R10
1498  	SHRQ  $0x10, R10
1499  	SHLQ  $0x10, R10
1500  	IMULQ R9, R10
1501  	SHRQ  $0x32, R10
1502  	MOVL  DX, R9
1503  	SUBL  16(SP), R9
1504  	MOVL  1(BX)(R9*1), R11
1505  	MOVQ  DI, R9
1506  	SHRQ  $0x08, R9
1507  	CMPL  R9, R11
1508  	JNE   no_repeat_found_encodeBlockAsm4MB
1509  	LEAL  1(DX), DI
1510  	MOVL  12(SP), R8
1511  	MOVL  DI, SI
1512  	SUBL  16(SP), SI
1513  	JZ    repeat_extend_back_end_encodeBlockAsm4MB
1514  
1515  repeat_extend_back_loop_encodeBlockAsm4MB:
1516  	CMPL DI, R8
1517  	JBE  repeat_extend_back_end_encodeBlockAsm4MB
1518  	MOVB -1(BX)(SI*1), R9
1519  	MOVB -1(BX)(DI*1), R10
1520  	CMPB R9, R10
1521  	JNE  repeat_extend_back_end_encodeBlockAsm4MB
1522  	LEAL -1(DI), DI
1523  	DECL SI
1524  	JNZ  repeat_extend_back_loop_encodeBlockAsm4MB
1525  
1526  repeat_extend_back_end_encodeBlockAsm4MB:
1527  	MOVL DI, SI
1528  	SUBL 12(SP), SI
1529  	LEAQ 4(CX)(SI*1), SI
1530  	CMPQ SI, (SP)
1531  	JB   repeat_dst_size_check_encodeBlockAsm4MB
1532  	MOVQ $0x00000000, ret+56(FP)
1533  	RET
1534  
1535  repeat_dst_size_check_encodeBlockAsm4MB:
1536  	MOVL 12(SP), SI
1537  	CMPL SI, DI
1538  	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm4MB
1539  	MOVL DI, R9
1540  	MOVL DI, 12(SP)
1541  	LEAQ (BX)(SI*1), R10
1542  	SUBL SI, R9
1543  	LEAL -1(R9), SI
1544  	CMPL SI, $0x3c
1545  	JB   one_byte_repeat_emit_encodeBlockAsm4MB
1546  	CMPL SI, $0x00000100
1547  	JB   two_bytes_repeat_emit_encodeBlockAsm4MB
1548  	CMPL SI, $0x00010000
1549  	JB   three_bytes_repeat_emit_encodeBlockAsm4MB
1550  	MOVL SI, R11
1551  	SHRL $0x10, R11
1552  	MOVB $0xf8, (CX)
1553  	MOVW SI, 1(CX)
1554  	MOVB R11, 3(CX)
1555  	ADDQ $0x04, CX
1556  	JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
1557  
1558  three_bytes_repeat_emit_encodeBlockAsm4MB:
1559  	MOVB $0xf4, (CX)
1560  	MOVW SI, 1(CX)
1561  	ADDQ $0x03, CX
1562  	JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
1563  
1564  two_bytes_repeat_emit_encodeBlockAsm4MB:
1565  	MOVB $0xf0, (CX)
1566  	MOVB SI, 1(CX)
1567  	ADDQ $0x02, CX
1568  	CMPL SI, $0x40
1569  	JB   memmove_repeat_emit_encodeBlockAsm4MB
1570  	JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
1571  
1572  one_byte_repeat_emit_encodeBlockAsm4MB:
1573  	SHLB $0x02, SI
1574  	MOVB SI, (CX)
1575  	ADDQ $0x01, CX
1576  
1577  memmove_repeat_emit_encodeBlockAsm4MB:
1578  	LEAQ (CX)(R9*1), SI
1579  
1580  	// genMemMoveShort
1581  	CMPQ R9, $0x08
1582  	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8
1583  	CMPQ R9, $0x10
1584  	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16
1585  	CMPQ R9, $0x20
1586  	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32
1587  	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64
1588  
1589  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8:
1590  	MOVQ (R10), R11
1591  	MOVQ R11, (CX)
1592  	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm4MB
1593  
1594  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16:
1595  	MOVQ (R10), R11
1596  	MOVQ -8(R10)(R9*1), R10
1597  	MOVQ R11, (CX)
1598  	MOVQ R10, -8(CX)(R9*1)
1599  	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm4MB
1600  
1601  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32:
1602  	MOVOU (R10), X0
1603  	MOVOU -16(R10)(R9*1), X1
1604  	MOVOU X0, (CX)
1605  	MOVOU X1, -16(CX)(R9*1)
1606  	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm4MB
1607  
1608  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64:
1609  	MOVOU (R10), X0
1610  	MOVOU 16(R10), X1
1611  	MOVOU -32(R10)(R9*1), X2
1612  	MOVOU -16(R10)(R9*1), X3
1613  	MOVOU X0, (CX)
1614  	MOVOU X1, 16(CX)
1615  	MOVOU X2, -32(CX)(R9*1)
1616  	MOVOU X3, -16(CX)(R9*1)
1617  
1618  memmove_end_copy_repeat_emit_encodeBlockAsm4MB:
1619  	MOVQ SI, CX
1620  	JMP  emit_literal_done_repeat_emit_encodeBlockAsm4MB
1621  
1622  memmove_long_repeat_emit_encodeBlockAsm4MB:
1623  	LEAQ (CX)(R9*1), SI
1624  
1625  	// genMemMoveLong
1626  	MOVOU (R10), X0
1627  	MOVOU 16(R10), X1
1628  	MOVOU -32(R10)(R9*1), X2
1629  	MOVOU -16(R10)(R9*1), X3
1630  	MOVQ  R9, R12
1631  	SHRQ  $0x05, R12
1632  	MOVQ  CX, R11
1633  	ANDL  $0x0000001f, R11
1634  	MOVQ  $0x00000040, R13
1635  	SUBQ  R11, R13
1636  	DECQ  R12
1637  	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
1638  	LEAQ  -32(R10)(R13*1), R11
1639  	LEAQ  -32(CX)(R13*1), R14
1640  
1641  emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back:
1642  	MOVOU (R11), X4
1643  	MOVOU 16(R11), X5
1644  	MOVOA X4, (R14)
1645  	MOVOA X5, 16(R14)
1646  	ADDQ  $0x20, R14
1647  	ADDQ  $0x20, R11
1648  	ADDQ  $0x20, R13
1649  	DECQ  R12
1650  	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back
1651  
1652  emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
1653  	MOVOU -32(R10)(R13*1), X4
1654  	MOVOU -16(R10)(R13*1), X5
1655  	MOVOA X4, -32(CX)(R13*1)
1656  	MOVOA X5, -16(CX)(R13*1)
1657  	ADDQ  $0x20, R13
1658  	CMPQ  R9, R13
1659  	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
1660  	MOVOU X0, (CX)
1661  	MOVOU X1, 16(CX)
1662  	MOVOU X2, -32(CX)(R9*1)
1663  	MOVOU X3, -16(CX)(R9*1)
1664  	MOVQ  SI, CX
1665  
1666  emit_literal_done_repeat_emit_encodeBlockAsm4MB:
1667  	ADDL $0x05, DX
1668  	MOVL DX, SI
1669  	SUBL 16(SP), SI
1670  	MOVQ src_len+32(FP), R9
1671  	SUBL DX, R9
1672  	LEAQ (BX)(DX*1), R10
1673  	LEAQ (BX)(SI*1), SI
1674  
1675  	// matchLen
1676  	XORL R12, R12
1677  
1678  matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB:
1679  	CMPL R9, $0x10
1680  	JB   matchlen_match8_repeat_extend_encodeBlockAsm4MB
1681  	MOVQ (R10)(R12*1), R11
1682  	MOVQ 8(R10)(R12*1), R13
1683  	XORQ (SI)(R12*1), R11
1684  	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB
1685  	XORQ 8(SI)(R12*1), R13
1686  	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm4MB
1687  	LEAL -16(R9), R9
1688  	LEAL 16(R12), R12
1689  	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB
1690  
1691  matchlen_bsf_16repeat_extend_encodeBlockAsm4MB:
1692  #ifdef GOAMD64_v3
1693  	TZCNTQ R13, R13
1694  
1695  #else
1696  	BSFQ R13, R13
1697  
1698  #endif
1699  	SARQ $0x03, R13
1700  	LEAL 8(R12)(R13*1), R12
1701  	JMP  repeat_extend_forward_end_encodeBlockAsm4MB
1702  
1703  matchlen_match8_repeat_extend_encodeBlockAsm4MB:
1704  	CMPL R9, $0x08
1705  	JB   matchlen_match4_repeat_extend_encodeBlockAsm4MB
1706  	MOVQ (R10)(R12*1), R11
1707  	XORQ (SI)(R12*1), R11
1708  	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB
1709  	LEAL -8(R9), R9
1710  	LEAL 8(R12), R12
1711  	JMP  matchlen_match4_repeat_extend_encodeBlockAsm4MB
1712  
1713  matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB:
1714  #ifdef GOAMD64_v3
1715  	TZCNTQ R11, R11
1716  
1717  #else
1718  	BSFQ R11, R11
1719  
1720  #endif
1721  	SARQ $0x03, R11
1722  	LEAL (R12)(R11*1), R12
1723  	JMP  repeat_extend_forward_end_encodeBlockAsm4MB
1724  
1725  matchlen_match4_repeat_extend_encodeBlockAsm4MB:
1726  	CMPL R9, $0x04
1727  	JB   matchlen_match2_repeat_extend_encodeBlockAsm4MB
1728  	MOVL (R10)(R12*1), R11
1729  	CMPL (SI)(R12*1), R11
1730  	JNE  matchlen_match2_repeat_extend_encodeBlockAsm4MB
1731  	LEAL -4(R9), R9
1732  	LEAL 4(R12), R12
1733  
1734  matchlen_match2_repeat_extend_encodeBlockAsm4MB:
1735  	CMPL R9, $0x01
1736  	JE   matchlen_match1_repeat_extend_encodeBlockAsm4MB
1737  	JB   repeat_extend_forward_end_encodeBlockAsm4MB
1738  	MOVW (R10)(R12*1), R11
1739  	CMPW (SI)(R12*1), R11
1740  	JNE  matchlen_match1_repeat_extend_encodeBlockAsm4MB
1741  	LEAL 2(R12), R12
1742  	SUBL $0x02, R9
1743  	JZ   repeat_extend_forward_end_encodeBlockAsm4MB
1744  
1745  matchlen_match1_repeat_extend_encodeBlockAsm4MB:
1746  	MOVB (R10)(R12*1), R11
1747  	CMPB (SI)(R12*1), R11
1748  	JNE  repeat_extend_forward_end_encodeBlockAsm4MB
1749  	LEAL 1(R12), R12
1750  
1751  repeat_extend_forward_end_encodeBlockAsm4MB:
1752  	ADDL  R12, DX
1753  	MOVL  DX, SI
1754  	SUBL  DI, SI
1755  	MOVL  16(SP), DI
1756  	TESTL R8, R8
1757  	JZ    repeat_as_copy_encodeBlockAsm4MB
1758  
1759  	// emitRepeat
1760  	MOVL SI, R8
1761  	LEAL -4(SI), SI
1762  	CMPL R8, $0x08
1763  	JBE  repeat_two_match_repeat_encodeBlockAsm4MB
1764  	CMPL R8, $0x0c
1765  	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB
1766  	CMPL DI, $0x00000800
1767  	JB   repeat_two_offset_match_repeat_encodeBlockAsm4MB
1768  
1769  cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB:
1770  	CMPL SI, $0x00000104
1771  	JB   repeat_three_match_repeat_encodeBlockAsm4MB
1772  	CMPL SI, $0x00010100
1773  	JB   repeat_four_match_repeat_encodeBlockAsm4MB
1774  	LEAL -65536(SI), SI
1775  	MOVL SI, DI
1776  	MOVW $0x001d, (CX)
1777  	MOVW SI, 2(CX)
1778  	SARL $0x10, DI
1779  	MOVB DI, 4(CX)
1780  	ADDQ $0x05, CX
1781  	JMP  repeat_end_emit_encodeBlockAsm4MB
1782  
1783  repeat_four_match_repeat_encodeBlockAsm4MB:
1784  	LEAL -256(SI), SI
1785  	MOVW $0x0019, (CX)
1786  	MOVW SI, 2(CX)
1787  	ADDQ $0x04, CX
1788  	JMP  repeat_end_emit_encodeBlockAsm4MB
1789  
1790  repeat_three_match_repeat_encodeBlockAsm4MB:
1791  	LEAL -4(SI), SI
1792  	MOVW $0x0015, (CX)
1793  	MOVB SI, 2(CX)
1794  	ADDQ $0x03, CX
1795  	JMP  repeat_end_emit_encodeBlockAsm4MB
1796  
1797  repeat_two_match_repeat_encodeBlockAsm4MB:
1798  	SHLL $0x02, SI
1799  	ORL  $0x01, SI
1800  	MOVW SI, (CX)
1801  	ADDQ $0x02, CX
1802  	JMP  repeat_end_emit_encodeBlockAsm4MB
1803  
1804  repeat_two_offset_match_repeat_encodeBlockAsm4MB:
1805  	XORQ R8, R8
1806  	LEAL 1(R8)(SI*4), SI
1807  	MOVB DI, 1(CX)
1808  	SARL $0x08, DI
1809  	SHLL $0x05, DI
1810  	ORL  DI, SI
1811  	MOVB SI, (CX)
1812  	ADDQ $0x02, CX
1813  	JMP  repeat_end_emit_encodeBlockAsm4MB
1814  
1815  repeat_as_copy_encodeBlockAsm4MB:
1816  	// emitCopy
1817  	CMPL DI, $0x00010000
1818  	JB   two_byte_offset_repeat_as_copy_encodeBlockAsm4MB
1819  	CMPL SI, $0x40
1820  	JBE  four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
1821  	MOVB $0xff, (CX)
1822  	MOVL DI, 1(CX)
1823  	LEAL -64(SI), SI
1824  	ADDQ $0x05, CX
1825  	CMPL SI, $0x04
1826  	JB   four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
1827  
1828  	// emitRepeat
1829  	MOVL SI, R8
1830  	LEAL -4(SI), SI
1831  	CMPL R8, $0x08
1832  	JBE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy
1833  	CMPL R8, $0x0c
1834  	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
1835  	CMPL DI, $0x00000800
1836  	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
1837  
1838  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
1839  	CMPL SI, $0x00000104
1840  	JB   repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy
1841  	CMPL SI, $0x00010100
1842  	JB   repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy
1843  	LEAL -65536(SI), SI
1844  	MOVL SI, DI
1845  	MOVW $0x001d, (CX)
1846  	MOVW SI, 2(CX)
1847  	SARL $0x10, DI
1848  	MOVB DI, 4(CX)
1849  	ADDQ $0x05, CX
1850  	JMP  repeat_end_emit_encodeBlockAsm4MB
1851  
1852  repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
1853  	LEAL -256(SI), SI
1854  	MOVW $0x0019, (CX)
1855  	MOVW SI, 2(CX)
1856  	ADDQ $0x04, CX
1857  	JMP  repeat_end_emit_encodeBlockAsm4MB
1858  
1859  repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
1860  	LEAL -4(SI), SI
1861  	MOVW $0x0015, (CX)
1862  	MOVB SI, 2(CX)
1863  	ADDQ $0x03, CX
1864  	JMP  repeat_end_emit_encodeBlockAsm4MB
1865  
1866  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
1867  	SHLL $0x02, SI
1868  	ORL  $0x01, SI
1869  	MOVW SI, (CX)
1870  	ADDQ $0x02, CX
1871  	JMP  repeat_end_emit_encodeBlockAsm4MB
1872  
1873  repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
1874  	XORQ R8, R8
1875  	LEAL 1(R8)(SI*4), SI
1876  	MOVB DI, 1(CX)
1877  	SARL $0x08, DI
1878  	SHLL $0x05, DI
1879  	ORL  DI, SI
1880  	MOVB SI, (CX)
1881  	ADDQ $0x02, CX
1882  	JMP  repeat_end_emit_encodeBlockAsm4MB
1883  
1884  four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB:
1885  	TESTL SI, SI
1886  	JZ    repeat_end_emit_encodeBlockAsm4MB
1887  	XORL  R8, R8
1888  	LEAL  -1(R8)(SI*4), SI
1889  	MOVB  SI, (CX)
1890  	MOVL  DI, 1(CX)
1891  	ADDQ  $0x05, CX
1892  	JMP   repeat_end_emit_encodeBlockAsm4MB
1893  
1894  two_byte_offset_repeat_as_copy_encodeBlockAsm4MB:
1895  	CMPL SI, $0x40
1896  	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB
1897  	CMPL DI, $0x00000800
1898  	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm4MB
1899  	MOVL $0x00000001, R8
1900  	LEAL 16(R8), R8
1901  	MOVB DI, 1(CX)
1902  	SHRL $0x08, DI
1903  	SHLL $0x05, DI
1904  	ORL  DI, R8
1905  	MOVB R8, (CX)
1906  	ADDQ $0x02, CX
1907  	SUBL $0x08, SI
1908  
1909  	// emitRepeat
1910  	LEAL -4(SI), SI
1911  	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
1912  	MOVL SI, R8
1913  	LEAL -4(SI), SI
1914  	CMPL R8, $0x08
1915  	JBE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
1916  	CMPL R8, $0x0c
1917  	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
1918  	CMPL DI, $0x00000800
1919  	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
1920  
1921  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
1922  	CMPL SI, $0x00000104
1923  	JB   repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
1924  	CMPL SI, $0x00010100
1925  	JB   repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
1926  	LEAL -65536(SI), SI
1927  	MOVL SI, DI
1928  	MOVW $0x001d, (CX)
1929  	MOVW SI, 2(CX)
1930  	SARL $0x10, DI
1931  	MOVB DI, 4(CX)
1932  	ADDQ $0x05, CX
1933  	JMP  repeat_end_emit_encodeBlockAsm4MB
1934  
1935  repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
1936  	LEAL -256(SI), SI
1937  	MOVW $0x0019, (CX)
1938  	MOVW SI, 2(CX)
1939  	ADDQ $0x04, CX
1940  	JMP  repeat_end_emit_encodeBlockAsm4MB
1941  
1942  repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
1943  	LEAL -4(SI), SI
1944  	MOVW $0x0015, (CX)
1945  	MOVB SI, 2(CX)
1946  	ADDQ $0x03, CX
1947  	JMP  repeat_end_emit_encodeBlockAsm4MB
1948  
1949  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
1950  	SHLL $0x02, SI
1951  	ORL  $0x01, SI
1952  	MOVW SI, (CX)
1953  	ADDQ $0x02, CX
1954  	JMP  repeat_end_emit_encodeBlockAsm4MB
1955  
1956  repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
1957  	XORQ R8, R8
1958  	LEAL 1(R8)(SI*4), SI
1959  	MOVB DI, 1(CX)
1960  	SARL $0x08, DI
1961  	SHLL $0x05, DI
1962  	ORL  DI, SI
1963  	MOVB SI, (CX)
1964  	ADDQ $0x02, CX
1965  	JMP  repeat_end_emit_encodeBlockAsm4MB
1966  
1967  long_offset_short_repeat_as_copy_encodeBlockAsm4MB:
1968  	MOVB $0xee, (CX)
1969  	MOVW DI, 1(CX)
1970  	LEAL -60(SI), SI
1971  	ADDQ $0x03, CX
1972  
1973  	// emitRepeat
1974  	MOVL SI, R8
1975  	LEAL -4(SI), SI
1976  	CMPL R8, $0x08
1977  	JBE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
1978  	CMPL R8, $0x0c
1979  	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
1980  	CMPL DI, $0x00000800
1981  	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
1982  
1983  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
1984  	CMPL SI, $0x00000104
1985  	JB   repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
1986  	CMPL SI, $0x00010100
1987  	JB   repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
1988  	LEAL -65536(SI), SI
1989  	MOVL SI, DI
1990  	MOVW $0x001d, (CX)
1991  	MOVW SI, 2(CX)
1992  	SARL $0x10, DI
1993  	MOVB DI, 4(CX)
1994  	ADDQ $0x05, CX
1995  	JMP  repeat_end_emit_encodeBlockAsm4MB
1996  
1997  repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
1998  	LEAL -256(SI), SI
1999  	MOVW $0x0019, (CX)
2000  	MOVW SI, 2(CX)
2001  	ADDQ $0x04, CX
2002  	JMP  repeat_end_emit_encodeBlockAsm4MB
2003  
2004  repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
2005  	LEAL -4(SI), SI
2006  	MOVW $0x0015, (CX)
2007  	MOVB SI, 2(CX)
2008  	ADDQ $0x03, CX
2009  	JMP  repeat_end_emit_encodeBlockAsm4MB
2010  
2011  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
2012  	SHLL $0x02, SI
2013  	ORL  $0x01, SI
2014  	MOVW SI, (CX)
2015  	ADDQ $0x02, CX
2016  	JMP  repeat_end_emit_encodeBlockAsm4MB
2017  
2018  repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
2019  	XORQ R8, R8
2020  	LEAL 1(R8)(SI*4), SI
2021  	MOVB DI, 1(CX)
2022  	SARL $0x08, DI
2023  	SHLL $0x05, DI
2024  	ORL  DI, SI
2025  	MOVB SI, (CX)
2026  	ADDQ $0x02, CX
2027  	JMP  repeat_end_emit_encodeBlockAsm4MB
2028  
2029  two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB:
2030  	MOVL SI, R8
2031  	SHLL $0x02, R8
2032  	CMPL SI, $0x0c
2033  	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
2034  	CMPL DI, $0x00000800
2035  	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
2036  	LEAL -15(R8), R8
2037  	MOVB DI, 1(CX)
2038  	SHRL $0x08, DI
2039  	SHLL $0x05, DI
2040  	ORL  DI, R8
2041  	MOVB R8, (CX)
2042  	ADDQ $0x02, CX
2043  	JMP  repeat_end_emit_encodeBlockAsm4MB
2044  
2045  emit_copy_three_repeat_as_copy_encodeBlockAsm4MB:
2046  	LEAL -2(R8), R8
2047  	MOVB R8, (CX)
2048  	MOVW DI, 1(CX)
2049  	ADDQ $0x03, CX
2050  
2051  repeat_end_emit_encodeBlockAsm4MB:
2052  	MOVL DX, 12(SP)
2053  	JMP  search_loop_encodeBlockAsm4MB
2054  
2055  no_repeat_found_encodeBlockAsm4MB:
2056  	CMPL (BX)(SI*1), DI
2057  	JEQ  candidate_match_encodeBlockAsm4MB
2058  	SHRQ $0x08, DI
2059  	MOVL (AX)(R10*4), SI
2060  	LEAL 2(DX), R9
2061  	CMPL (BX)(R8*1), DI
2062  	JEQ  candidate2_match_encodeBlockAsm4MB
2063  	MOVL R9, (AX)(R10*4)
2064  	SHRQ $0x08, DI
2065  	CMPL (BX)(SI*1), DI
2066  	JEQ  candidate3_match_encodeBlockAsm4MB
2067  	MOVL 20(SP), DX
2068  	JMP  search_loop_encodeBlockAsm4MB
2069  
2070  candidate3_match_encodeBlockAsm4MB:
2071  	ADDL $0x02, DX
2072  	JMP  candidate_match_encodeBlockAsm4MB
2073  
2074  candidate2_match_encodeBlockAsm4MB:
2075  	MOVL R9, (AX)(R10*4)
2076  	INCL DX
2077  	MOVL R8, SI
2078  
2079  candidate_match_encodeBlockAsm4MB:
2080  	MOVL  12(SP), DI
2081  	TESTL SI, SI
2082  	JZ    match_extend_back_end_encodeBlockAsm4MB
2083  
2084  match_extend_back_loop_encodeBlockAsm4MB:
2085  	CMPL DX, DI
2086  	JBE  match_extend_back_end_encodeBlockAsm4MB
2087  	MOVB -1(BX)(SI*1), R8
2088  	MOVB -1(BX)(DX*1), R9
2089  	CMPB R8, R9
2090  	JNE  match_extend_back_end_encodeBlockAsm4MB
2091  	LEAL -1(DX), DX
2092  	DECL SI
2093  	JZ   match_extend_back_end_encodeBlockAsm4MB
2094  	JMP  match_extend_back_loop_encodeBlockAsm4MB
2095  
2096  match_extend_back_end_encodeBlockAsm4MB:
2097  	MOVL DX, DI
2098  	SUBL 12(SP), DI
2099  	LEAQ 4(CX)(DI*1), DI
2100  	CMPQ DI, (SP)
2101  	JB   match_dst_size_check_encodeBlockAsm4MB
2102  	MOVQ $0x00000000, ret+56(FP)
2103  	RET
2104  
2105  match_dst_size_check_encodeBlockAsm4MB:
2106  	MOVL DX, DI
2107  	MOVL 12(SP), R8
2108  	CMPL R8, DI
2109  	JEQ  emit_literal_done_match_emit_encodeBlockAsm4MB
2110  	MOVL DI, R9
2111  	MOVL DI, 12(SP)
2112  	LEAQ (BX)(R8*1), DI
2113  	SUBL R8, R9
2114  	LEAL -1(R9), R8
2115  	CMPL R8, $0x3c
2116  	JB   one_byte_match_emit_encodeBlockAsm4MB
2117  	CMPL R8, $0x00000100
2118  	JB   two_bytes_match_emit_encodeBlockAsm4MB
2119  	CMPL R8, $0x00010000
2120  	JB   three_bytes_match_emit_encodeBlockAsm4MB
2121  	MOVL R8, R10
2122  	SHRL $0x10, R10
2123  	MOVB $0xf8, (CX)
2124  	MOVW R8, 1(CX)
2125  	MOVB R10, 3(CX)
2126  	ADDQ $0x04, CX
2127  	JMP  memmove_long_match_emit_encodeBlockAsm4MB
2128  
2129  three_bytes_match_emit_encodeBlockAsm4MB:
2130  	MOVB $0xf4, (CX)
2131  	MOVW R8, 1(CX)
2132  	ADDQ $0x03, CX
2133  	JMP  memmove_long_match_emit_encodeBlockAsm4MB
2134  
2135  two_bytes_match_emit_encodeBlockAsm4MB:
2136  	MOVB $0xf0, (CX)
2137  	MOVB R8, 1(CX)
2138  	ADDQ $0x02, CX
2139  	CMPL R8, $0x40
2140  	JB   memmove_match_emit_encodeBlockAsm4MB
2141  	JMP  memmove_long_match_emit_encodeBlockAsm4MB
2142  
2143  one_byte_match_emit_encodeBlockAsm4MB:
2144  	SHLB $0x02, R8
2145  	MOVB R8, (CX)
2146  	ADDQ $0x01, CX
2147  
2148  memmove_match_emit_encodeBlockAsm4MB:
2149  	LEAQ (CX)(R9*1), R8
2150  
2151  	// genMemMoveShort
2152  	CMPQ R9, $0x08
2153  	JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8
2154  	CMPQ R9, $0x10
2155  	JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16
2156  	CMPQ R9, $0x20
2157  	JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32
2158  	JMP  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64
2159  
2160  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8:
2161  	MOVQ (DI), R10
2162  	MOVQ R10, (CX)
2163  	JMP  memmove_end_copy_match_emit_encodeBlockAsm4MB
2164  
2165  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16:
2166  	MOVQ (DI), R10
2167  	MOVQ -8(DI)(R9*1), DI
2168  	MOVQ R10, (CX)
2169  	MOVQ DI, -8(CX)(R9*1)
2170  	JMP  memmove_end_copy_match_emit_encodeBlockAsm4MB
2171  
2172  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32:
2173  	MOVOU (DI), X0
2174  	MOVOU -16(DI)(R9*1), X1
2175  	MOVOU X0, (CX)
2176  	MOVOU X1, -16(CX)(R9*1)
2177  	JMP   memmove_end_copy_match_emit_encodeBlockAsm4MB
2178  
2179  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64:
2180  	MOVOU (DI), X0
2181  	MOVOU 16(DI), X1
2182  	MOVOU -32(DI)(R9*1), X2
2183  	MOVOU -16(DI)(R9*1), X3
2184  	MOVOU X0, (CX)
2185  	MOVOU X1, 16(CX)
2186  	MOVOU X2, -32(CX)(R9*1)
2187  	MOVOU X3, -16(CX)(R9*1)
2188  
2189  memmove_end_copy_match_emit_encodeBlockAsm4MB:
2190  	MOVQ R8, CX
2191  	JMP  emit_literal_done_match_emit_encodeBlockAsm4MB
2192  
2193  memmove_long_match_emit_encodeBlockAsm4MB:
2194  	LEAQ (CX)(R9*1), R8
2195  
2196  	// genMemMoveLong
2197  	MOVOU (DI), X0
2198  	MOVOU 16(DI), X1
2199  	MOVOU -32(DI)(R9*1), X2
2200  	MOVOU -16(DI)(R9*1), X3
2201  	MOVQ  R9, R11
2202  	SHRQ  $0x05, R11
2203  	MOVQ  CX, R10
2204  	ANDL  $0x0000001f, R10
2205  	MOVQ  $0x00000040, R12
2206  	SUBQ  R10, R12
2207  	DECQ  R11
2208  	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
2209  	LEAQ  -32(DI)(R12*1), R10
2210  	LEAQ  -32(CX)(R12*1), R13
2211  
2212  emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back:
2213  	MOVOU (R10), X4
2214  	MOVOU 16(R10), X5
2215  	MOVOA X4, (R13)
2216  	MOVOA X5, 16(R13)
2217  	ADDQ  $0x20, R13
2218  	ADDQ  $0x20, R10
2219  	ADDQ  $0x20, R12
2220  	DECQ  R11
2221  	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back
2222  
2223  emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
2224  	MOVOU -32(DI)(R12*1), X4
2225  	MOVOU -16(DI)(R12*1), X5
2226  	MOVOA X4, -32(CX)(R12*1)
2227  	MOVOA X5, -16(CX)(R12*1)
2228  	ADDQ  $0x20, R12
2229  	CMPQ  R9, R12
2230  	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
2231  	MOVOU X0, (CX)
2232  	MOVOU X1, 16(CX)
2233  	MOVOU X2, -32(CX)(R9*1)
2234  	MOVOU X3, -16(CX)(R9*1)
2235  	MOVQ  R8, CX
2236  
2237  emit_literal_done_match_emit_encodeBlockAsm4MB:
2238  match_nolit_loop_encodeBlockAsm4MB:
2239  	MOVL DX, DI
2240  	SUBL SI, DI
2241  	MOVL DI, 16(SP)
2242  	ADDL $0x04, DX
2243  	ADDL $0x04, SI
2244  	MOVQ src_len+32(FP), DI
2245  	SUBL DX, DI
2246  	LEAQ (BX)(DX*1), R8
2247  	LEAQ (BX)(SI*1), SI
2248  
2249  	// matchLen
2250  	XORL R10, R10
2251  
2252  matchlen_loopback_16_match_nolit_encodeBlockAsm4MB:
2253  	CMPL DI, $0x10
2254  	JB   matchlen_match8_match_nolit_encodeBlockAsm4MB
2255  	MOVQ (R8)(R10*1), R9
2256  	MOVQ 8(R8)(R10*1), R11
2257  	XORQ (SI)(R10*1), R9
2258  	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm4MB
2259  	XORQ 8(SI)(R10*1), R11
2260  	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm4MB
2261  	LEAL -16(DI), DI
2262  	LEAL 16(R10), R10
2263  	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm4MB
2264  
2265  matchlen_bsf_16match_nolit_encodeBlockAsm4MB:
2266  #ifdef GOAMD64_v3
2267  	TZCNTQ R11, R11
2268  
2269  #else
2270  	BSFQ R11, R11
2271  
2272  #endif
2273  	SARQ $0x03, R11
2274  	LEAL 8(R10)(R11*1), R10
2275  	JMP  match_nolit_end_encodeBlockAsm4MB
2276  
2277  matchlen_match8_match_nolit_encodeBlockAsm4MB:
2278  	CMPL DI, $0x08
2279  	JB   matchlen_match4_match_nolit_encodeBlockAsm4MB
2280  	MOVQ (R8)(R10*1), R9
2281  	XORQ (SI)(R10*1), R9
2282  	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm4MB
2283  	LEAL -8(DI), DI
2284  	LEAL 8(R10), R10
2285  	JMP  matchlen_match4_match_nolit_encodeBlockAsm4MB
2286  
2287  matchlen_bsf_8_match_nolit_encodeBlockAsm4MB:
2288  #ifdef GOAMD64_v3
2289  	TZCNTQ R9, R9
2290  
2291  #else
2292  	BSFQ R9, R9
2293  
2294  #endif
2295  	SARQ $0x03, R9
2296  	LEAL (R10)(R9*1), R10
2297  	JMP  match_nolit_end_encodeBlockAsm4MB
2298  
2299  matchlen_match4_match_nolit_encodeBlockAsm4MB:
2300  	CMPL DI, $0x04
2301  	JB   matchlen_match2_match_nolit_encodeBlockAsm4MB
2302  	MOVL (R8)(R10*1), R9
2303  	CMPL (SI)(R10*1), R9
2304  	JNE  matchlen_match2_match_nolit_encodeBlockAsm4MB
2305  	LEAL -4(DI), DI
2306  	LEAL 4(R10), R10
2307  
2308  matchlen_match2_match_nolit_encodeBlockAsm4MB:
2309  	CMPL DI, $0x01
2310  	JE   matchlen_match1_match_nolit_encodeBlockAsm4MB
2311  	JB   match_nolit_end_encodeBlockAsm4MB
2312  	MOVW (R8)(R10*1), R9
2313  	CMPW (SI)(R10*1), R9
2314  	JNE  matchlen_match1_match_nolit_encodeBlockAsm4MB
2315  	LEAL 2(R10), R10
2316  	SUBL $0x02, DI
2317  	JZ   match_nolit_end_encodeBlockAsm4MB
2318  
2319  matchlen_match1_match_nolit_encodeBlockAsm4MB:
2320  	MOVB (R8)(R10*1), R9
2321  	CMPB (SI)(R10*1), R9
2322  	JNE  match_nolit_end_encodeBlockAsm4MB
2323  	LEAL 1(R10), R10
2324  
2325  match_nolit_end_encodeBlockAsm4MB:
2326  	ADDL R10, DX
2327  	MOVL 16(SP), SI
2328  	ADDL $0x04, R10
2329  	MOVL DX, 12(SP)
2330  
2331  	// emitCopy
2332  	CMPL SI, $0x00010000
2333  	JB   two_byte_offset_match_nolit_encodeBlockAsm4MB
2334  	CMPL R10, $0x40
2335  	JBE  four_bytes_remain_match_nolit_encodeBlockAsm4MB
2336  	MOVB $0xff, (CX)
2337  	MOVL SI, 1(CX)
2338  	LEAL -64(R10), R10
2339  	ADDQ $0x05, CX
2340  	CMPL R10, $0x04
2341  	JB   four_bytes_remain_match_nolit_encodeBlockAsm4MB
2342  
2343  	// emitRepeat
2344  	MOVL R10, DI
2345  	LEAL -4(R10), R10
2346  	CMPL DI, $0x08
2347  	JBE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy
2348  	CMPL DI, $0x0c
2349  	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
2350  	CMPL SI, $0x00000800
2351  	JB   repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
2352  
2353  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
2354  	CMPL R10, $0x00000104
2355  	JB   repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy
2356  	CMPL R10, $0x00010100
2357  	JB   repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy
2358  	LEAL -65536(R10), R10
2359  	MOVL R10, SI
2360  	MOVW $0x001d, (CX)
2361  	MOVW R10, 2(CX)
2362  	SARL $0x10, SI
2363  	MOVB SI, 4(CX)
2364  	ADDQ $0x05, CX
2365  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2366  
2367  repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy:
2368  	LEAL -256(R10), R10
2369  	MOVW $0x0019, (CX)
2370  	MOVW R10, 2(CX)
2371  	ADDQ $0x04, CX
2372  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2373  
2374  repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy:
2375  	LEAL -4(R10), R10
2376  	MOVW $0x0015, (CX)
2377  	MOVB R10, 2(CX)
2378  	ADDQ $0x03, CX
2379  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2380  
2381  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy:
2382  	SHLL $0x02, R10
2383  	ORL  $0x01, R10
2384  	MOVW R10, (CX)
2385  	ADDQ $0x02, CX
2386  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2387  
2388  repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
2389  	XORQ DI, DI
2390  	LEAL 1(DI)(R10*4), R10
2391  	MOVB SI, 1(CX)
2392  	SARL $0x08, SI
2393  	SHLL $0x05, SI
2394  	ORL  SI, R10
2395  	MOVB R10, (CX)
2396  	ADDQ $0x02, CX
2397  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2398  
2399  four_bytes_remain_match_nolit_encodeBlockAsm4MB:
2400  	TESTL R10, R10
2401  	JZ    match_nolit_emitcopy_end_encodeBlockAsm4MB
2402  	XORL  DI, DI
2403  	LEAL  -1(DI)(R10*4), R10
2404  	MOVB  R10, (CX)
2405  	MOVL  SI, 1(CX)
2406  	ADDQ  $0x05, CX
2407  	JMP   match_nolit_emitcopy_end_encodeBlockAsm4MB
2408  
2409  two_byte_offset_match_nolit_encodeBlockAsm4MB:
2410  	CMPL R10, $0x40
2411  	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm4MB
2412  	CMPL SI, $0x00000800
2413  	JAE  long_offset_short_match_nolit_encodeBlockAsm4MB
2414  	MOVL $0x00000001, DI
2415  	LEAL 16(DI), DI
2416  	MOVB SI, 1(CX)
2417  	SHRL $0x08, SI
2418  	SHLL $0x05, SI
2419  	ORL  SI, DI
2420  	MOVB DI, (CX)
2421  	ADDQ $0x02, CX
2422  	SUBL $0x08, R10
2423  
2424  	// emitRepeat
2425  	LEAL -4(R10), R10
2426  	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
2427  	MOVL R10, DI
2428  	LEAL -4(R10), R10
2429  	CMPL DI, $0x08
2430  	JBE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
2431  	CMPL DI, $0x0c
2432  	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
2433  	CMPL SI, $0x00000800
2434  	JB   repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
2435  
2436  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
2437  	CMPL R10, $0x00000104
2438  	JB   repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
2439  	CMPL R10, $0x00010100
2440  	JB   repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
2441  	LEAL -65536(R10), R10
2442  	MOVL R10, SI
2443  	MOVW $0x001d, (CX)
2444  	MOVW R10, 2(CX)
2445  	SARL $0x10, SI
2446  	MOVB SI, 4(CX)
2447  	ADDQ $0x05, CX
2448  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2449  
2450  repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
2451  	LEAL -256(R10), R10
2452  	MOVW $0x0019, (CX)
2453  	MOVW R10, 2(CX)
2454  	ADDQ $0x04, CX
2455  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2456  
2457  repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
2458  	LEAL -4(R10), R10
2459  	MOVW $0x0015, (CX)
2460  	MOVB R10, 2(CX)
2461  	ADDQ $0x03, CX
2462  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2463  
2464  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
2465  	SHLL $0x02, R10
2466  	ORL  $0x01, R10
2467  	MOVW R10, (CX)
2468  	ADDQ $0x02, CX
2469  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2470  
2471  repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
2472  	XORQ DI, DI
2473  	LEAL 1(DI)(R10*4), R10
2474  	MOVB SI, 1(CX)
2475  	SARL $0x08, SI
2476  	SHLL $0x05, SI
2477  	ORL  SI, R10
2478  	MOVB R10, (CX)
2479  	ADDQ $0x02, CX
2480  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2481  
2482  long_offset_short_match_nolit_encodeBlockAsm4MB:
2483  	MOVB $0xee, (CX)
2484  	MOVW SI, 1(CX)
2485  	LEAL -60(R10), R10
2486  	ADDQ $0x03, CX
2487  
2488  	// emitRepeat
2489  	MOVL R10, DI
2490  	LEAL -4(R10), R10
2491  	CMPL DI, $0x08
2492  	JBE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short
2493  	CMPL DI, $0x0c
2494  	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
2495  	CMPL SI, $0x00000800
2496  	JB   repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
2497  
2498  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
2499  	CMPL R10, $0x00000104
2500  	JB   repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short
2501  	CMPL R10, $0x00010100
2502  	JB   repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short
2503  	LEAL -65536(R10), R10
2504  	MOVL R10, SI
2505  	MOVW $0x001d, (CX)
2506  	MOVW R10, 2(CX)
2507  	SARL $0x10, SI
2508  	MOVB SI, 4(CX)
2509  	ADDQ $0x05, CX
2510  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2511  
2512  repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short:
2513  	LEAL -256(R10), R10
2514  	MOVW $0x0019, (CX)
2515  	MOVW R10, 2(CX)
2516  	ADDQ $0x04, CX
2517  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2518  
2519  repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short:
2520  	LEAL -4(R10), R10
2521  	MOVW $0x0015, (CX)
2522  	MOVB R10, 2(CX)
2523  	ADDQ $0x03, CX
2524  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2525  
2526  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short:
2527  	SHLL $0x02, R10
2528  	ORL  $0x01, R10
2529  	MOVW R10, (CX)
2530  	ADDQ $0x02, CX
2531  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2532  
2533  repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
2534  	XORQ DI, DI
2535  	LEAL 1(DI)(R10*4), R10
2536  	MOVB SI, 1(CX)
2537  	SARL $0x08, SI
2538  	SHLL $0x05, SI
2539  	ORL  SI, R10
2540  	MOVB R10, (CX)
2541  	ADDQ $0x02, CX
2542  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2543  
2544  two_byte_offset_short_match_nolit_encodeBlockAsm4MB:
2545  	MOVL R10, DI
2546  	SHLL $0x02, DI
2547  	CMPL R10, $0x0c
2548  	JAE  emit_copy_three_match_nolit_encodeBlockAsm4MB
2549  	CMPL SI, $0x00000800
2550  	JAE  emit_copy_three_match_nolit_encodeBlockAsm4MB
2551  	LEAL -15(DI), DI
2552  	MOVB SI, 1(CX)
2553  	SHRL $0x08, SI
2554  	SHLL $0x05, SI
2555  	ORL  SI, DI
2556  	MOVB DI, (CX)
2557  	ADDQ $0x02, CX
2558  	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
2559  
2560  emit_copy_three_match_nolit_encodeBlockAsm4MB:
2561  	LEAL -2(DI), DI
2562  	MOVB DI, (CX)
2563  	MOVW SI, 1(CX)
2564  	ADDQ $0x03, CX
2565  
2566  match_nolit_emitcopy_end_encodeBlockAsm4MB:
2567  	CMPL DX, 8(SP)
2568  	JAE  emit_remainder_encodeBlockAsm4MB
2569  	MOVQ -2(BX)(DX*1), DI
2570  	CMPQ CX, (SP)
2571  	JB   match_nolit_dst_ok_encodeBlockAsm4MB
2572  	MOVQ $0x00000000, ret+56(FP)
2573  	RET
2574  
2575  match_nolit_dst_ok_encodeBlockAsm4MB:
2576  	MOVQ  $0x0000cf1bbcdcbf9b, R9
2577  	MOVQ  DI, R8
2578  	SHRQ  $0x10, DI
2579  	MOVQ  DI, SI
2580  	SHLQ  $0x10, R8
2581  	IMULQ R9, R8
2582  	SHRQ  $0x32, R8
2583  	SHLQ  $0x10, SI
2584  	IMULQ R9, SI
2585  	SHRQ  $0x32, SI
2586  	LEAL  -2(DX), R9
2587  	LEAQ  (AX)(SI*4), R10
2588  	MOVL  (R10), SI
2589  	MOVL  R9, (AX)(R8*4)
2590  	MOVL  DX, (R10)
2591  	CMPL  (BX)(SI*1), DI
2592  	JEQ   match_nolit_loop_encodeBlockAsm4MB
2593  	INCL  DX
2594  	JMP   search_loop_encodeBlockAsm4MB
2595  
2596  emit_remainder_encodeBlockAsm4MB:
2597  	MOVQ src_len+32(FP), AX
2598  	SUBL 12(SP), AX
2599  	LEAQ 4(CX)(AX*1), AX
2600  	CMPQ AX, (SP)
2601  	JB   emit_remainder_ok_encodeBlockAsm4MB
2602  	MOVQ $0x00000000, ret+56(FP)
2603  	RET
2604  
2605  emit_remainder_ok_encodeBlockAsm4MB:
2606  	MOVQ src_len+32(FP), AX
2607  	MOVL 12(SP), DX
2608  	CMPL DX, AX
2609  	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm4MB
2610  	MOVL AX, SI
2611  	MOVL AX, 12(SP)
2612  	LEAQ (BX)(DX*1), AX
2613  	SUBL DX, SI
2614  	LEAL -1(SI), DX
2615  	CMPL DX, $0x3c
2616  	JB   one_byte_emit_remainder_encodeBlockAsm4MB
2617  	CMPL DX, $0x00000100
2618  	JB   two_bytes_emit_remainder_encodeBlockAsm4MB
2619  	CMPL DX, $0x00010000
2620  	JB   three_bytes_emit_remainder_encodeBlockAsm4MB
2621  	MOVL DX, BX
2622  	SHRL $0x10, BX
2623  	MOVB $0xf8, (CX)
2624  	MOVW DX, 1(CX)
2625  	MOVB BL, 3(CX)
2626  	ADDQ $0x04, CX
2627  	JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
2628  
2629  three_bytes_emit_remainder_encodeBlockAsm4MB:
2630  	MOVB $0xf4, (CX)
2631  	MOVW DX, 1(CX)
2632  	ADDQ $0x03, CX
2633  	JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
2634  
2635  two_bytes_emit_remainder_encodeBlockAsm4MB:
2636  	MOVB $0xf0, (CX)
2637  	MOVB DL, 1(CX)
2638  	ADDQ $0x02, CX
2639  	CMPL DX, $0x40
2640  	JB   memmove_emit_remainder_encodeBlockAsm4MB
2641  	JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
2642  
2643  one_byte_emit_remainder_encodeBlockAsm4MB:
2644  	SHLB $0x02, DL
2645  	MOVB DL, (CX)
2646  	ADDQ $0x01, CX
2647  
2648  memmove_emit_remainder_encodeBlockAsm4MB:
2649  	LEAQ (CX)(SI*1), DX
2650  	MOVL SI, BX
2651  
2652  	// genMemMoveShort
2653  	CMPQ BX, $0x03
2654  	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2
2655  	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3
2656  	CMPQ BX, $0x08
2657  	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7
2658  	CMPQ BX, $0x10
2659  	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16
2660  	CMPQ BX, $0x20
2661  	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32
2662  	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64
2663  
2664  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2:
2665  	MOVB (AX), SI
2666  	MOVB -1(AX)(BX*1), AL
2667  	MOVB SI, (CX)
2668  	MOVB AL, -1(CX)(BX*1)
2669  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
2670  
2671  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3:
2672  	MOVW (AX), SI
2673  	MOVB 2(AX), AL
2674  	MOVW SI, (CX)
2675  	MOVB AL, 2(CX)
2676  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
2677  
2678  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7:
2679  	MOVL (AX), SI
2680  	MOVL -4(AX)(BX*1), AX
2681  	MOVL SI, (CX)
2682  	MOVL AX, -4(CX)(BX*1)
2683  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
2684  
2685  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16:
2686  	MOVQ (AX), SI
2687  	MOVQ -8(AX)(BX*1), AX
2688  	MOVQ SI, (CX)
2689  	MOVQ AX, -8(CX)(BX*1)
2690  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
2691  
2692  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32:
2693  	MOVOU (AX), X0
2694  	MOVOU -16(AX)(BX*1), X1
2695  	MOVOU X0, (CX)
2696  	MOVOU X1, -16(CX)(BX*1)
2697  	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm4MB
2698  
2699  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64:
2700  	MOVOU (AX), X0
2701  	MOVOU 16(AX), X1
2702  	MOVOU -32(AX)(BX*1), X2
2703  	MOVOU -16(AX)(BX*1), X3
2704  	MOVOU X0, (CX)
2705  	MOVOU X1, 16(CX)
2706  	MOVOU X2, -32(CX)(BX*1)
2707  	MOVOU X3, -16(CX)(BX*1)
2708  
2709  memmove_end_copy_emit_remainder_encodeBlockAsm4MB:
2710  	MOVQ DX, CX
2711  	JMP  emit_literal_done_emit_remainder_encodeBlockAsm4MB
2712  
2713  memmove_long_emit_remainder_encodeBlockAsm4MB:
2714  	LEAQ (CX)(SI*1), DX
2715  	MOVL SI, BX
2716  
2717  	// genMemMoveLong
2718  	MOVOU (AX), X0
2719  	MOVOU 16(AX), X1
2720  	MOVOU -32(AX)(BX*1), X2
2721  	MOVOU -16(AX)(BX*1), X3
2722  	MOVQ  BX, DI
2723  	SHRQ  $0x05, DI
2724  	MOVQ  CX, SI
2725  	ANDL  $0x0000001f, SI
2726  	MOVQ  $0x00000040, R8
2727  	SUBQ  SI, R8
2728  	DECQ  DI
2729  	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
2730  	LEAQ  -32(AX)(R8*1), SI
2731  	LEAQ  -32(CX)(R8*1), R9
2732  
2733  emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back:
2734  	MOVOU (SI), X4
2735  	MOVOU 16(SI), X5
2736  	MOVOA X4, (R9)
2737  	MOVOA X5, 16(R9)
2738  	ADDQ  $0x20, R9
2739  	ADDQ  $0x20, SI
2740  	ADDQ  $0x20, R8
2741  	DECQ  DI
2742  	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back
2743  
2744  emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32:
2745  	MOVOU -32(AX)(R8*1), X4
2746  	MOVOU -16(AX)(R8*1), X5
2747  	MOVOA X4, -32(CX)(R8*1)
2748  	MOVOA X5, -16(CX)(R8*1)
2749  	ADDQ  $0x20, R8
2750  	CMPQ  BX, R8
2751  	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
2752  	MOVOU X0, (CX)
2753  	MOVOU X1, 16(CX)
2754  	MOVOU X2, -32(CX)(BX*1)
2755  	MOVOU X3, -16(CX)(BX*1)
2756  	MOVQ  DX, CX
2757  
2758  emit_literal_done_emit_remainder_encodeBlockAsm4MB:
2759  	MOVQ dst_base+0(FP), AX
2760  	SUBQ AX, CX
2761  	MOVQ CX, ret+56(FP)
2762  	RET
2763  
2764  // func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int
2765  // Requires: BMI, SSE2
2766  TEXT ·encodeBlockAsm12B(SB), $24-64
2767  	MOVQ tmp+48(FP), AX
2768  	MOVQ dst_base+0(FP), CX
2769  	MOVQ $0x00000080, DX
2770  	MOVQ AX, BX
2771  	PXOR X0, X0
2772  
2773  zero_loop_encodeBlockAsm12B:
2774  	MOVOU X0, (BX)
2775  	MOVOU X0, 16(BX)
2776  	MOVOU X0, 32(BX)
2777  	MOVOU X0, 48(BX)
2778  	MOVOU X0, 64(BX)
2779  	MOVOU X0, 80(BX)
2780  	MOVOU X0, 96(BX)
2781  	MOVOU X0, 112(BX)
2782  	ADDQ  $0x80, BX
2783  	DECQ  DX
2784  	JNZ   zero_loop_encodeBlockAsm12B
2785  	MOVL  $0x00000000, 12(SP)
2786  	MOVQ  src_len+32(FP), DX
2787  	LEAQ  -9(DX), BX
2788  	LEAQ  -8(DX), SI
2789  	MOVL  SI, 8(SP)
2790  	SHRQ  $0x05, DX
2791  	SUBL  DX, BX
2792  	LEAQ  (CX)(BX*1), BX
2793  	MOVQ  BX, (SP)
2794  	MOVL  $0x00000001, DX
2795  	MOVL  DX, 16(SP)
2796  	MOVQ  src_base+24(FP), BX
2797  
2798  search_loop_encodeBlockAsm12B:
2799  	MOVL  DX, SI
2800  	SUBL  12(SP), SI
2801  	SHRL  $0x05, SI
2802  	LEAL  4(DX)(SI*1), SI
2803  	CMPL  SI, 8(SP)
2804  	JAE   emit_remainder_encodeBlockAsm12B
2805  	MOVQ  (BX)(DX*1), DI
2806  	MOVL  SI, 20(SP)
2807  	MOVQ  $0x000000cf1bbcdcbb, R9
2808  	MOVQ  DI, R10
2809  	MOVQ  DI, R11
2810  	SHRQ  $0x08, R11
2811  	SHLQ  $0x18, R10
2812  	IMULQ R9, R10
2813  	SHRQ  $0x34, R10
2814  	SHLQ  $0x18, R11
2815  	IMULQ R9, R11
2816  	SHRQ  $0x34, R11
2817  	MOVL  (AX)(R10*4), SI
2818  	MOVL  (AX)(R11*4), R8
2819  	MOVL  DX, (AX)(R10*4)
2820  	LEAL  1(DX), R10
2821  	MOVL  R10, (AX)(R11*4)
2822  	MOVQ  DI, R10
2823  	SHRQ  $0x10, R10
2824  	SHLQ  $0x18, R10
2825  	IMULQ R9, R10
2826  	SHRQ  $0x34, R10
2827  	MOVL  DX, R9
2828  	SUBL  16(SP), R9
2829  	MOVL  1(BX)(R9*1), R11
2830  	MOVQ  DI, R9
2831  	SHRQ  $0x08, R9
2832  	CMPL  R9, R11
2833  	JNE   no_repeat_found_encodeBlockAsm12B
2834  	LEAL  1(DX), DI
2835  	MOVL  12(SP), R8
2836  	MOVL  DI, SI
2837  	SUBL  16(SP), SI
2838  	JZ    repeat_extend_back_end_encodeBlockAsm12B
2839  
2840  repeat_extend_back_loop_encodeBlockAsm12B:
2841  	CMPL DI, R8
2842  	JBE  repeat_extend_back_end_encodeBlockAsm12B
2843  	MOVB -1(BX)(SI*1), R9
2844  	MOVB -1(BX)(DI*1), R10
2845  	CMPB R9, R10
2846  	JNE  repeat_extend_back_end_encodeBlockAsm12B
2847  	LEAL -1(DI), DI
2848  	DECL SI
2849  	JNZ  repeat_extend_back_loop_encodeBlockAsm12B
2850  
2851  repeat_extend_back_end_encodeBlockAsm12B:
2852  	MOVL DI, SI
2853  	SUBL 12(SP), SI
2854  	LEAQ 3(CX)(SI*1), SI
2855  	CMPQ SI, (SP)
2856  	JB   repeat_dst_size_check_encodeBlockAsm12B
2857  	MOVQ $0x00000000, ret+56(FP)
2858  	RET
2859  
2860  repeat_dst_size_check_encodeBlockAsm12B:
2861  	MOVL 12(SP), SI
2862  	CMPL SI, DI
2863  	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm12B
2864  	MOVL DI, R9
2865  	MOVL DI, 12(SP)
2866  	LEAQ (BX)(SI*1), R10
2867  	SUBL SI, R9
2868  	LEAL -1(R9), SI
2869  	CMPL SI, $0x3c
2870  	JB   one_byte_repeat_emit_encodeBlockAsm12B
2871  	CMPL SI, $0x00000100
2872  	JB   two_bytes_repeat_emit_encodeBlockAsm12B
2873  	JB   three_bytes_repeat_emit_encodeBlockAsm12B
2874  
2875  three_bytes_repeat_emit_encodeBlockAsm12B:
2876  	MOVB $0xf4, (CX)
2877  	MOVW SI, 1(CX)
2878  	ADDQ $0x03, CX
2879  	JMP  memmove_long_repeat_emit_encodeBlockAsm12B
2880  
2881  two_bytes_repeat_emit_encodeBlockAsm12B:
2882  	MOVB $0xf0, (CX)
2883  	MOVB SI, 1(CX)
2884  	ADDQ $0x02, CX
2885  	CMPL SI, $0x40
2886  	JB   memmove_repeat_emit_encodeBlockAsm12B
2887  	JMP  memmove_long_repeat_emit_encodeBlockAsm12B
2888  
2889  one_byte_repeat_emit_encodeBlockAsm12B:
2890  	SHLB $0x02, SI
2891  	MOVB SI, (CX)
2892  	ADDQ $0x01, CX
2893  
2894  memmove_repeat_emit_encodeBlockAsm12B:
2895  	LEAQ (CX)(R9*1), SI
2896  
2897  	// genMemMoveShort
2898  	CMPQ R9, $0x08
2899  	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8
2900  	CMPQ R9, $0x10
2901  	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16
2902  	CMPQ R9, $0x20
2903  	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32
2904  	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64
2905  
2906  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8:
2907  	MOVQ (R10), R11
2908  	MOVQ R11, (CX)
2909  	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B
2910  
2911  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16:
2912  	MOVQ (R10), R11
2913  	MOVQ -8(R10)(R9*1), R10
2914  	MOVQ R11, (CX)
2915  	MOVQ R10, -8(CX)(R9*1)
2916  	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B
2917  
2918  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32:
2919  	MOVOU (R10), X0
2920  	MOVOU -16(R10)(R9*1), X1
2921  	MOVOU X0, (CX)
2922  	MOVOU X1, -16(CX)(R9*1)
2923  	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm12B
2924  
2925  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64:
2926  	MOVOU (R10), X0
2927  	MOVOU 16(R10), X1
2928  	MOVOU -32(R10)(R9*1), X2
2929  	MOVOU -16(R10)(R9*1), X3
2930  	MOVOU X0, (CX)
2931  	MOVOU X1, 16(CX)
2932  	MOVOU X2, -32(CX)(R9*1)
2933  	MOVOU X3, -16(CX)(R9*1)
2934  
2935  memmove_end_copy_repeat_emit_encodeBlockAsm12B:
2936  	MOVQ SI, CX
2937  	JMP  emit_literal_done_repeat_emit_encodeBlockAsm12B
2938  
2939  memmove_long_repeat_emit_encodeBlockAsm12B:
2940  	LEAQ (CX)(R9*1), SI
2941  
2942  	// genMemMoveLong
2943  	MOVOU (R10), X0
2944  	MOVOU 16(R10), X1
2945  	MOVOU -32(R10)(R9*1), X2
2946  	MOVOU -16(R10)(R9*1), X3
2947  	MOVQ  R9, R12
2948  	SHRQ  $0x05, R12
2949  	MOVQ  CX, R11
2950  	ANDL  $0x0000001f, R11
2951  	MOVQ  $0x00000040, R13
2952  	SUBQ  R11, R13
2953  	DECQ  R12
2954  	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
2955  	LEAQ  -32(R10)(R13*1), R11
2956  	LEAQ  -32(CX)(R13*1), R14
2957  
2958  emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back:
2959  	MOVOU (R11), X4
2960  	MOVOU 16(R11), X5
2961  	MOVOA X4, (R14)
2962  	MOVOA X5, 16(R14)
2963  	ADDQ  $0x20, R14
2964  	ADDQ  $0x20, R11
2965  	ADDQ  $0x20, R13
2966  	DECQ  R12
2967  	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back
2968  
2969  emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
2970  	MOVOU -32(R10)(R13*1), X4
2971  	MOVOU -16(R10)(R13*1), X5
2972  	MOVOA X4, -32(CX)(R13*1)
2973  	MOVOA X5, -16(CX)(R13*1)
2974  	ADDQ  $0x20, R13
2975  	CMPQ  R9, R13
2976  	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
2977  	MOVOU X0, (CX)
2978  	MOVOU X1, 16(CX)
2979  	MOVOU X2, -32(CX)(R9*1)
2980  	MOVOU X3, -16(CX)(R9*1)
2981  	MOVQ  SI, CX
2982  
2983  emit_literal_done_repeat_emit_encodeBlockAsm12B:
2984  	ADDL $0x05, DX
2985  	MOVL DX, SI
2986  	SUBL 16(SP), SI
2987  	MOVQ src_len+32(FP), R9
2988  	SUBL DX, R9
2989  	LEAQ (BX)(DX*1), R10
2990  	LEAQ (BX)(SI*1), SI
2991  
2992  	// matchLen
2993  	XORL R12, R12
2994  
2995  matchlen_loopback_16_repeat_extend_encodeBlockAsm12B:
2996  	CMPL R9, $0x10
2997  	JB   matchlen_match8_repeat_extend_encodeBlockAsm12B
2998  	MOVQ (R10)(R12*1), R11
2999  	MOVQ 8(R10)(R12*1), R13
3000  	XORQ (SI)(R12*1), R11
3001  	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm12B
3002  	XORQ 8(SI)(R12*1), R13
3003  	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm12B
3004  	LEAL -16(R9), R9
3005  	LEAL 16(R12), R12
3006  	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm12B
3007  
3008  matchlen_bsf_16repeat_extend_encodeBlockAsm12B:
3009  #ifdef GOAMD64_v3
3010  	TZCNTQ R13, R13
3011  
3012  #else
3013  	BSFQ R13, R13
3014  
3015  #endif
3016  	SARQ $0x03, R13
3017  	LEAL 8(R12)(R13*1), R12
3018  	JMP  repeat_extend_forward_end_encodeBlockAsm12B
3019  
3020  matchlen_match8_repeat_extend_encodeBlockAsm12B:
3021  	CMPL R9, $0x08
3022  	JB   matchlen_match4_repeat_extend_encodeBlockAsm12B
3023  	MOVQ (R10)(R12*1), R11
3024  	XORQ (SI)(R12*1), R11
3025  	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm12B
3026  	LEAL -8(R9), R9
3027  	LEAL 8(R12), R12
3028  	JMP  matchlen_match4_repeat_extend_encodeBlockAsm12B
3029  
3030  matchlen_bsf_8_repeat_extend_encodeBlockAsm12B:
3031  #ifdef GOAMD64_v3
3032  	TZCNTQ R11, R11
3033  
3034  #else
3035  	BSFQ R11, R11
3036  
3037  #endif
3038  	SARQ $0x03, R11
3039  	LEAL (R12)(R11*1), R12
3040  	JMP  repeat_extend_forward_end_encodeBlockAsm12B
3041  
3042  matchlen_match4_repeat_extend_encodeBlockAsm12B:
3043  	CMPL R9, $0x04
3044  	JB   matchlen_match2_repeat_extend_encodeBlockAsm12B
3045  	MOVL (R10)(R12*1), R11
3046  	CMPL (SI)(R12*1), R11
3047  	JNE  matchlen_match2_repeat_extend_encodeBlockAsm12B
3048  	LEAL -4(R9), R9
3049  	LEAL 4(R12), R12
3050  
3051  matchlen_match2_repeat_extend_encodeBlockAsm12B:
3052  	CMPL R9, $0x01
3053  	JE   matchlen_match1_repeat_extend_encodeBlockAsm12B
3054  	JB   repeat_extend_forward_end_encodeBlockAsm12B
3055  	MOVW (R10)(R12*1), R11
3056  	CMPW (SI)(R12*1), R11
3057  	JNE  matchlen_match1_repeat_extend_encodeBlockAsm12B
3058  	LEAL 2(R12), R12
3059  	SUBL $0x02, R9
3060  	JZ   repeat_extend_forward_end_encodeBlockAsm12B
3061  
3062  matchlen_match1_repeat_extend_encodeBlockAsm12B:
3063  	MOVB (R10)(R12*1), R11
3064  	CMPB (SI)(R12*1), R11
3065  	JNE  repeat_extend_forward_end_encodeBlockAsm12B
3066  	LEAL 1(R12), R12
3067  
3068  repeat_extend_forward_end_encodeBlockAsm12B:
3069  	ADDL  R12, DX
3070  	MOVL  DX, SI
3071  	SUBL  DI, SI
3072  	MOVL  16(SP), DI
3073  	TESTL R8, R8
3074  	JZ    repeat_as_copy_encodeBlockAsm12B
3075  
3076  	// emitRepeat
3077  	MOVL SI, R8
3078  	LEAL -4(SI), SI
3079  	CMPL R8, $0x08
3080  	JBE  repeat_two_match_repeat_encodeBlockAsm12B
3081  	CMPL R8, $0x0c
3082  	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm12B
3083  	CMPL DI, $0x00000800
3084  	JB   repeat_two_offset_match_repeat_encodeBlockAsm12B
3085  
3086  cant_repeat_two_offset_match_repeat_encodeBlockAsm12B:
3087  	CMPL SI, $0x00000104
3088  	JB   repeat_three_match_repeat_encodeBlockAsm12B
3089  	LEAL -256(SI), SI
3090  	MOVW $0x0019, (CX)
3091  	MOVW SI, 2(CX)
3092  	ADDQ $0x04, CX
3093  	JMP  repeat_end_emit_encodeBlockAsm12B
3094  
3095  repeat_three_match_repeat_encodeBlockAsm12B:
3096  	LEAL -4(SI), SI
3097  	MOVW $0x0015, (CX)
3098  	MOVB SI, 2(CX)
3099  	ADDQ $0x03, CX
3100  	JMP  repeat_end_emit_encodeBlockAsm12B
3101  
3102  repeat_two_match_repeat_encodeBlockAsm12B:
3103  	SHLL $0x02, SI
3104  	ORL  $0x01, SI
3105  	MOVW SI, (CX)
3106  	ADDQ $0x02, CX
3107  	JMP  repeat_end_emit_encodeBlockAsm12B
3108  
3109  repeat_two_offset_match_repeat_encodeBlockAsm12B:
3110  	XORQ R8, R8
3111  	LEAL 1(R8)(SI*4), SI
3112  	MOVB DI, 1(CX)
3113  	SARL $0x08, DI
3114  	SHLL $0x05, DI
3115  	ORL  DI, SI
3116  	MOVB SI, (CX)
3117  	ADDQ $0x02, CX
3118  	JMP  repeat_end_emit_encodeBlockAsm12B
3119  
3120  repeat_as_copy_encodeBlockAsm12B:
3121  	// emitCopy
3122  	CMPL SI, $0x40
3123  	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B
3124  	CMPL DI, $0x00000800
3125  	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm12B
3126  	MOVL $0x00000001, R8
3127  	LEAL 16(R8), R8
3128  	MOVB DI, 1(CX)
3129  	SHRL $0x08, DI
3130  	SHLL $0x05, DI
3131  	ORL  DI, R8
3132  	MOVB R8, (CX)
3133  	ADDQ $0x02, CX
3134  	SUBL $0x08, SI
3135  
3136  	// emitRepeat
3137  	LEAL -4(SI), SI
3138  	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
3139  	MOVL SI, R8
3140  	LEAL -4(SI), SI
3141  	CMPL R8, $0x08
3142  	JBE  repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
3143  	CMPL R8, $0x0c
3144  	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
3145  	CMPL DI, $0x00000800
3146  	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
3147  
3148  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
3149  	CMPL SI, $0x00000104
3150  	JB   repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
3151  	LEAL -256(SI), SI
3152  	MOVW $0x0019, (CX)
3153  	MOVW SI, 2(CX)
3154  	ADDQ $0x04, CX
3155  	JMP  repeat_end_emit_encodeBlockAsm12B
3156  
3157  repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
3158  	LEAL -4(SI), SI
3159  	MOVW $0x0015, (CX)
3160  	MOVB SI, 2(CX)
3161  	ADDQ $0x03, CX
3162  	JMP  repeat_end_emit_encodeBlockAsm12B
3163  
3164  repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
3165  	SHLL $0x02, SI
3166  	ORL  $0x01, SI
3167  	MOVW SI, (CX)
3168  	ADDQ $0x02, CX
3169  	JMP  repeat_end_emit_encodeBlockAsm12B
3170  
3171  repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
3172  	XORQ R8, R8
3173  	LEAL 1(R8)(SI*4), SI
3174  	MOVB DI, 1(CX)
3175  	SARL $0x08, DI
3176  	SHLL $0x05, DI
3177  	ORL  DI, SI
3178  	MOVB SI, (CX)
3179  	ADDQ $0x02, CX
3180  	JMP  repeat_end_emit_encodeBlockAsm12B
3181  
3182  long_offset_short_repeat_as_copy_encodeBlockAsm12B:
3183  	MOVB $0xee, (CX)
3184  	MOVW DI, 1(CX)
3185  	LEAL -60(SI), SI
3186  	ADDQ $0x03, CX
3187  
3188  	// emitRepeat
3189  	MOVL SI, R8
3190  	LEAL -4(SI), SI
3191  	CMPL R8, $0x08
3192  	JBE  repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
3193  	CMPL R8, $0x0c
3194  	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
3195  	CMPL DI, $0x00000800
3196  	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
3197  
3198  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
3199  	CMPL SI, $0x00000104
3200  	JB   repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
3201  	LEAL -256(SI), SI
3202  	MOVW $0x0019, (CX)
3203  	MOVW SI, 2(CX)
3204  	ADDQ $0x04, CX
3205  	JMP  repeat_end_emit_encodeBlockAsm12B
3206  
3207  repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
3208  	LEAL -4(SI), SI
3209  	MOVW $0x0015, (CX)
3210  	MOVB SI, 2(CX)
3211  	ADDQ $0x03, CX
3212  	JMP  repeat_end_emit_encodeBlockAsm12B
3213  
3214  repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
3215  	SHLL $0x02, SI
3216  	ORL  $0x01, SI
3217  	MOVW SI, (CX)
3218  	ADDQ $0x02, CX
3219  	JMP  repeat_end_emit_encodeBlockAsm12B
3220  
3221  repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
3222  	XORQ R8, R8
3223  	LEAL 1(R8)(SI*4), SI
3224  	MOVB DI, 1(CX)
3225  	SARL $0x08, DI
3226  	SHLL $0x05, DI
3227  	ORL  DI, SI
3228  	MOVB SI, (CX)
3229  	ADDQ $0x02, CX
3230  	JMP  repeat_end_emit_encodeBlockAsm12B
3231  
3232  two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B:
3233  	MOVL SI, R8
3234  	SHLL $0x02, R8
3235  	CMPL SI, $0x0c
3236  	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm12B
3237  	CMPL DI, $0x00000800
3238  	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm12B
3239  	LEAL -15(R8), R8
3240  	MOVB DI, 1(CX)
3241  	SHRL $0x08, DI
3242  	SHLL $0x05, DI
3243  	ORL  DI, R8
3244  	MOVB R8, (CX)
3245  	ADDQ $0x02, CX
3246  	JMP  repeat_end_emit_encodeBlockAsm12B
3247  
3248  emit_copy_three_repeat_as_copy_encodeBlockAsm12B:
3249  	LEAL -2(R8), R8
3250  	MOVB R8, (CX)
3251  	MOVW DI, 1(CX)
3252  	ADDQ $0x03, CX
3253  
3254  repeat_end_emit_encodeBlockAsm12B:
3255  	MOVL DX, 12(SP)
3256  	JMP  search_loop_encodeBlockAsm12B
3257  
3258  no_repeat_found_encodeBlockAsm12B:
3259  	CMPL (BX)(SI*1), DI
3260  	JEQ  candidate_match_encodeBlockAsm12B
3261  	SHRQ $0x08, DI
3262  	MOVL (AX)(R10*4), SI
3263  	LEAL 2(DX), R9
3264  	CMPL (BX)(R8*1), DI
3265  	JEQ  candidate2_match_encodeBlockAsm12B
3266  	MOVL R9, (AX)(R10*4)
3267  	SHRQ $0x08, DI
3268  	CMPL (BX)(SI*1), DI
3269  	JEQ  candidate3_match_encodeBlockAsm12B
3270  	MOVL 20(SP), DX
3271  	JMP  search_loop_encodeBlockAsm12B
3272  
3273  candidate3_match_encodeBlockAsm12B:
3274  	ADDL $0x02, DX
3275  	JMP  candidate_match_encodeBlockAsm12B
3276  
3277  candidate2_match_encodeBlockAsm12B:
3278  	MOVL R9, (AX)(R10*4)
3279  	INCL DX
3280  	MOVL R8, SI
3281  
3282  candidate_match_encodeBlockAsm12B:
3283  	MOVL  12(SP), DI
3284  	TESTL SI, SI
3285  	JZ    match_extend_back_end_encodeBlockAsm12B
3286  
3287  match_extend_back_loop_encodeBlockAsm12B:
3288  	CMPL DX, DI
3289  	JBE  match_extend_back_end_encodeBlockAsm12B
3290  	MOVB -1(BX)(SI*1), R8
3291  	MOVB -1(BX)(DX*1), R9
3292  	CMPB R8, R9
3293  	JNE  match_extend_back_end_encodeBlockAsm12B
3294  	LEAL -1(DX), DX
3295  	DECL SI
3296  	JZ   match_extend_back_end_encodeBlockAsm12B
3297  	JMP  match_extend_back_loop_encodeBlockAsm12B
3298  
3299  match_extend_back_end_encodeBlockAsm12B:
3300  	MOVL DX, DI
3301  	SUBL 12(SP), DI
3302  	LEAQ 3(CX)(DI*1), DI
3303  	CMPQ DI, (SP)
3304  	JB   match_dst_size_check_encodeBlockAsm12B
3305  	MOVQ $0x00000000, ret+56(FP)
3306  	RET
3307  
3308  match_dst_size_check_encodeBlockAsm12B:
3309  	MOVL DX, DI
3310  	MOVL 12(SP), R8
3311  	CMPL R8, DI
3312  	JEQ  emit_literal_done_match_emit_encodeBlockAsm12B
3313  	MOVL DI, R9
3314  	MOVL DI, 12(SP)
3315  	LEAQ (BX)(R8*1), DI
3316  	SUBL R8, R9
3317  	LEAL -1(R9), R8
3318  	CMPL R8, $0x3c
3319  	JB   one_byte_match_emit_encodeBlockAsm12B
3320  	CMPL R8, $0x00000100
3321  	JB   two_bytes_match_emit_encodeBlockAsm12B
3322  	JB   three_bytes_match_emit_encodeBlockAsm12B
3323  
3324  three_bytes_match_emit_encodeBlockAsm12B:
3325  	MOVB $0xf4, (CX)
3326  	MOVW R8, 1(CX)
3327  	ADDQ $0x03, CX
3328  	JMP  memmove_long_match_emit_encodeBlockAsm12B
3329  
3330  two_bytes_match_emit_encodeBlockAsm12B:
3331  	MOVB $0xf0, (CX)
3332  	MOVB R8, 1(CX)
3333  	ADDQ $0x02, CX
3334  	CMPL R8, $0x40
3335  	JB   memmove_match_emit_encodeBlockAsm12B
3336  	JMP  memmove_long_match_emit_encodeBlockAsm12B
3337  
3338  one_byte_match_emit_encodeBlockAsm12B:
3339  	SHLB $0x02, R8
3340  	MOVB R8, (CX)
3341  	ADDQ $0x01, CX
3342  
3343  memmove_match_emit_encodeBlockAsm12B:
3344  	LEAQ (CX)(R9*1), R8
3345  
3346  	// genMemMoveShort
3347  	CMPQ R9, $0x08
3348  	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8
3349  	CMPQ R9, $0x10
3350  	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16
3351  	CMPQ R9, $0x20
3352  	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32
3353  	JMP  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64
3354  
3355  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8:
3356  	MOVQ (DI), R10
3357  	MOVQ R10, (CX)
3358  	JMP  memmove_end_copy_match_emit_encodeBlockAsm12B
3359  
3360  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16:
3361  	MOVQ (DI), R10
3362  	MOVQ -8(DI)(R9*1), DI
3363  	MOVQ R10, (CX)
3364  	MOVQ DI, -8(CX)(R9*1)
3365  	JMP  memmove_end_copy_match_emit_encodeBlockAsm12B
3366  
3367  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32:
3368  	MOVOU (DI), X0
3369  	MOVOU -16(DI)(R9*1), X1
3370  	MOVOU X0, (CX)
3371  	MOVOU X1, -16(CX)(R9*1)
3372  	JMP   memmove_end_copy_match_emit_encodeBlockAsm12B
3373  
3374  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64:
3375  	MOVOU (DI), X0
3376  	MOVOU 16(DI), X1
3377  	MOVOU -32(DI)(R9*1), X2
3378  	MOVOU -16(DI)(R9*1), X3
3379  	MOVOU X0, (CX)
3380  	MOVOU X1, 16(CX)
3381  	MOVOU X2, -32(CX)(R9*1)
3382  	MOVOU X3, -16(CX)(R9*1)
3383  
3384  memmove_end_copy_match_emit_encodeBlockAsm12B:
3385  	MOVQ R8, CX
3386  	JMP  emit_literal_done_match_emit_encodeBlockAsm12B
3387  
3388  memmove_long_match_emit_encodeBlockAsm12B:
3389  	LEAQ (CX)(R9*1), R8
3390  
3391  	// genMemMoveLong
3392  	MOVOU (DI), X0
3393  	MOVOU 16(DI), X1
3394  	MOVOU -32(DI)(R9*1), X2
3395  	MOVOU -16(DI)(R9*1), X3
3396  	MOVQ  R9, R11
3397  	SHRQ  $0x05, R11
3398  	MOVQ  CX, R10
3399  	ANDL  $0x0000001f, R10
3400  	MOVQ  $0x00000040, R12
3401  	SUBQ  R10, R12
3402  	DECQ  R11
3403  	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
3404  	LEAQ  -32(DI)(R12*1), R10
3405  	LEAQ  -32(CX)(R12*1), R13
3406  
3407  emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back:
3408  	MOVOU (R10), X4
3409  	MOVOU 16(R10), X5
3410  	MOVOA X4, (R13)
3411  	MOVOA X5, 16(R13)
3412  	ADDQ  $0x20, R13
3413  	ADDQ  $0x20, R10
3414  	ADDQ  $0x20, R12
3415  	DECQ  R11
3416  	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back
3417  
3418  emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
3419  	MOVOU -32(DI)(R12*1), X4
3420  	MOVOU -16(DI)(R12*1), X5
3421  	MOVOA X4, -32(CX)(R12*1)
3422  	MOVOA X5, -16(CX)(R12*1)
3423  	ADDQ  $0x20, R12
3424  	CMPQ  R9, R12
3425  	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
3426  	MOVOU X0, (CX)
3427  	MOVOU X1, 16(CX)
3428  	MOVOU X2, -32(CX)(R9*1)
3429  	MOVOU X3, -16(CX)(R9*1)
3430  	MOVQ  R8, CX
3431  
3432  emit_literal_done_match_emit_encodeBlockAsm12B:
3433  match_nolit_loop_encodeBlockAsm12B:
3434  	MOVL DX, DI
3435  	SUBL SI, DI
3436  	MOVL DI, 16(SP)
3437  	ADDL $0x04, DX
3438  	ADDL $0x04, SI
3439  	MOVQ src_len+32(FP), DI
3440  	SUBL DX, DI
3441  	LEAQ (BX)(DX*1), R8
3442  	LEAQ (BX)(SI*1), SI
3443  
3444  	// matchLen
3445  	XORL R10, R10
3446  
3447  matchlen_loopback_16_match_nolit_encodeBlockAsm12B:
3448  	CMPL DI, $0x10
3449  	JB   matchlen_match8_match_nolit_encodeBlockAsm12B
3450  	MOVQ (R8)(R10*1), R9
3451  	MOVQ 8(R8)(R10*1), R11
3452  	XORQ (SI)(R10*1), R9
3453  	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm12B
3454  	XORQ 8(SI)(R10*1), R11
3455  	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm12B
3456  	LEAL -16(DI), DI
3457  	LEAL 16(R10), R10
3458  	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm12B
3459  
3460  matchlen_bsf_16match_nolit_encodeBlockAsm12B:
3461  #ifdef GOAMD64_v3
3462  	TZCNTQ R11, R11
3463  
3464  #else
3465  	BSFQ R11, R11
3466  
3467  #endif
3468  	SARQ $0x03, R11
3469  	LEAL 8(R10)(R11*1), R10
3470  	JMP  match_nolit_end_encodeBlockAsm12B
3471  
3472  matchlen_match8_match_nolit_encodeBlockAsm12B:
3473  	CMPL DI, $0x08
3474  	JB   matchlen_match4_match_nolit_encodeBlockAsm12B
3475  	MOVQ (R8)(R10*1), R9
3476  	XORQ (SI)(R10*1), R9
3477  	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm12B
3478  	LEAL -8(DI), DI
3479  	LEAL 8(R10), R10
3480  	JMP  matchlen_match4_match_nolit_encodeBlockAsm12B
3481  
3482  matchlen_bsf_8_match_nolit_encodeBlockAsm12B:
3483  #ifdef GOAMD64_v3
3484  	TZCNTQ R9, R9
3485  
3486  #else
3487  	BSFQ R9, R9
3488  
3489  #endif
3490  	SARQ $0x03, R9
3491  	LEAL (R10)(R9*1), R10
3492  	JMP  match_nolit_end_encodeBlockAsm12B
3493  
3494  matchlen_match4_match_nolit_encodeBlockAsm12B:
3495  	CMPL DI, $0x04
3496  	JB   matchlen_match2_match_nolit_encodeBlockAsm12B
3497  	MOVL (R8)(R10*1), R9
3498  	CMPL (SI)(R10*1), R9
3499  	JNE  matchlen_match2_match_nolit_encodeBlockAsm12B
3500  	LEAL -4(DI), DI
3501  	LEAL 4(R10), R10
3502  
3503  matchlen_match2_match_nolit_encodeBlockAsm12B:
3504  	CMPL DI, $0x01
3505  	JE   matchlen_match1_match_nolit_encodeBlockAsm12B
3506  	JB   match_nolit_end_encodeBlockAsm12B
3507  	MOVW (R8)(R10*1), R9
3508  	CMPW (SI)(R10*1), R9
3509  	JNE  matchlen_match1_match_nolit_encodeBlockAsm12B
3510  	LEAL 2(R10), R10
3511  	SUBL $0x02, DI
3512  	JZ   match_nolit_end_encodeBlockAsm12B
3513  
3514  matchlen_match1_match_nolit_encodeBlockAsm12B:
3515  	MOVB (R8)(R10*1), R9
3516  	CMPB (SI)(R10*1), R9
3517  	JNE  match_nolit_end_encodeBlockAsm12B
3518  	LEAL 1(R10), R10
3519  
3520  match_nolit_end_encodeBlockAsm12B:
3521  	ADDL R10, DX
3522  	MOVL 16(SP), SI
3523  	ADDL $0x04, R10
3524  	MOVL DX, 12(SP)
3525  
3526  	// emitCopy
3527  	CMPL R10, $0x40
3528  	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm12B
3529  	CMPL SI, $0x00000800
3530  	JAE  long_offset_short_match_nolit_encodeBlockAsm12B
3531  	MOVL $0x00000001, DI
3532  	LEAL 16(DI), DI
3533  	MOVB SI, 1(CX)
3534  	SHRL $0x08, SI
3535  	SHLL $0x05, SI
3536  	ORL  SI, DI
3537  	MOVB DI, (CX)
3538  	ADDQ $0x02, CX
3539  	SUBL $0x08, R10
3540  
3541  	// emitRepeat
3542  	LEAL -4(R10), R10
3543  	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
3544  	MOVL R10, DI
3545  	LEAL -4(R10), R10
3546  	CMPL DI, $0x08
3547  	JBE  repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
3548  	CMPL DI, $0x0c
3549  	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
3550  	CMPL SI, $0x00000800
3551  	JB   repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
3552  
3553  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
3554  	CMPL R10, $0x00000104
3555  	JB   repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
3556  	LEAL -256(R10), R10
3557  	MOVW $0x0019, (CX)
3558  	MOVW R10, 2(CX)
3559  	ADDQ $0x04, CX
3560  	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
3561  
3562  repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
3563  	LEAL -4(R10), R10
3564  	MOVW $0x0015, (CX)
3565  	MOVB R10, 2(CX)
3566  	ADDQ $0x03, CX
3567  	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
3568  
3569  repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
3570  	SHLL $0x02, R10
3571  	ORL  $0x01, R10
3572  	MOVW R10, (CX)
3573  	ADDQ $0x02, CX
3574  	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
3575  
3576  repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
3577  	XORQ DI, DI
3578  	LEAL 1(DI)(R10*4), R10
3579  	MOVB SI, 1(CX)
3580  	SARL $0x08, SI
3581  	SHLL $0x05, SI
3582  	ORL  SI, R10
3583  	MOVB R10, (CX)
3584  	ADDQ $0x02, CX
3585  	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
3586  
3587  long_offset_short_match_nolit_encodeBlockAsm12B:
3588  	MOVB $0xee, (CX)
3589  	MOVW SI, 1(CX)
3590  	LEAL -60(R10), R10
3591  	ADDQ $0x03, CX
3592  
3593  	// emitRepeat
3594  	MOVL R10, DI
3595  	LEAL -4(R10), R10
3596  	CMPL DI, $0x08
3597  	JBE  repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short
3598  	CMPL DI, $0x0c
3599  	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
3600  	CMPL SI, $0x00000800
3601  	JB   repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
3602  
3603  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
3604  	CMPL R10, $0x00000104
3605  	JB   repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short
3606  	LEAL -256(R10), R10
3607  	MOVW $0x0019, (CX)
3608  	MOVW R10, 2(CX)
3609  	ADDQ $0x04, CX
3610  	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
3611  
3612  repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short:
3613  	LEAL -4(R10), R10
3614  	MOVW $0x0015, (CX)
3615  	MOVB R10, 2(CX)
3616  	ADDQ $0x03, CX
3617  	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
3618  
3619  repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short:
3620  	SHLL $0x02, R10
3621  	ORL  $0x01, R10
3622  	MOVW R10, (CX)
3623  	ADDQ $0x02, CX
3624  	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
3625  
3626  repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
3627  	XORQ DI, DI
3628  	LEAL 1(DI)(R10*4), R10
3629  	MOVB SI, 1(CX)
3630  	SARL $0x08, SI
3631  	SHLL $0x05, SI
3632  	ORL  SI, R10
3633  	MOVB R10, (CX)
3634  	ADDQ $0x02, CX
3635  	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
3636  
3637  two_byte_offset_short_match_nolit_encodeBlockAsm12B:
3638  	MOVL R10, DI
3639  	SHLL $0x02, DI
3640  	CMPL R10, $0x0c
3641  	JAE  emit_copy_three_match_nolit_encodeBlockAsm12B
3642  	CMPL SI, $0x00000800
3643  	JAE  emit_copy_three_match_nolit_encodeBlockAsm12B
3644  	LEAL -15(DI), DI
3645  	MOVB SI, 1(CX)
3646  	SHRL $0x08, SI
3647  	SHLL $0x05, SI
3648  	ORL  SI, DI
3649  	MOVB DI, (CX)
3650  	ADDQ $0x02, CX
3651  	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
3652  
3653  emit_copy_three_match_nolit_encodeBlockAsm12B:
3654  	LEAL -2(DI), DI
3655  	MOVB DI, (CX)
3656  	MOVW SI, 1(CX)
3657  	ADDQ $0x03, CX
3658  
3659  match_nolit_emitcopy_end_encodeBlockAsm12B:
3660  	CMPL DX, 8(SP)
3661  	JAE  emit_remainder_encodeBlockAsm12B
3662  	MOVQ -2(BX)(DX*1), DI
3663  	CMPQ CX, (SP)
3664  	JB   match_nolit_dst_ok_encodeBlockAsm12B
3665  	MOVQ $0x00000000, ret+56(FP)
3666  	RET
3667  
3668  match_nolit_dst_ok_encodeBlockAsm12B:
3669  	MOVQ  $0x000000cf1bbcdcbb, R9
3670  	MOVQ  DI, R8
3671  	SHRQ  $0x10, DI
3672  	MOVQ  DI, SI
3673  	SHLQ  $0x18, R8
3674  	IMULQ R9, R8
3675  	SHRQ  $0x34, R8
3676  	SHLQ  $0x18, SI
3677  	IMULQ R9, SI
3678  	SHRQ  $0x34, SI
3679  	LEAL  -2(DX), R9
3680  	LEAQ  (AX)(SI*4), R10
3681  	MOVL  (R10), SI
3682  	MOVL  R9, (AX)(R8*4)
3683  	MOVL  DX, (R10)
3684  	CMPL  (BX)(SI*1), DI
3685  	JEQ   match_nolit_loop_encodeBlockAsm12B
3686  	INCL  DX
3687  	JMP   search_loop_encodeBlockAsm12B
3688  
3689  emit_remainder_encodeBlockAsm12B:
3690  	MOVQ src_len+32(FP), AX
3691  	SUBL 12(SP), AX
3692  	LEAQ 3(CX)(AX*1), AX
3693  	CMPQ AX, (SP)
3694  	JB   emit_remainder_ok_encodeBlockAsm12B
3695  	MOVQ $0x00000000, ret+56(FP)
3696  	RET
3697  
3698  emit_remainder_ok_encodeBlockAsm12B:
3699  	MOVQ src_len+32(FP), AX
3700  	MOVL 12(SP), DX
3701  	CMPL DX, AX
3702  	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm12B
3703  	MOVL AX, SI
3704  	MOVL AX, 12(SP)
3705  	LEAQ (BX)(DX*1), AX
3706  	SUBL DX, SI
3707  	LEAL -1(SI), DX
3708  	CMPL DX, $0x3c
3709  	JB   one_byte_emit_remainder_encodeBlockAsm12B
3710  	CMPL DX, $0x00000100
3711  	JB   two_bytes_emit_remainder_encodeBlockAsm12B
3712  	JB   three_bytes_emit_remainder_encodeBlockAsm12B
3713  
3714  three_bytes_emit_remainder_encodeBlockAsm12B:
3715  	MOVB $0xf4, (CX)
3716  	MOVW DX, 1(CX)
3717  	ADDQ $0x03, CX
3718  	JMP  memmove_long_emit_remainder_encodeBlockAsm12B
3719  
3720  two_bytes_emit_remainder_encodeBlockAsm12B:
3721  	MOVB $0xf0, (CX)
3722  	MOVB DL, 1(CX)
3723  	ADDQ $0x02, CX
3724  	CMPL DX, $0x40
3725  	JB   memmove_emit_remainder_encodeBlockAsm12B
3726  	JMP  memmove_long_emit_remainder_encodeBlockAsm12B
3727  
3728  one_byte_emit_remainder_encodeBlockAsm12B:
3729  	SHLB $0x02, DL
3730  	MOVB DL, (CX)
3731  	ADDQ $0x01, CX
3732  
3733  memmove_emit_remainder_encodeBlockAsm12B:
3734  	LEAQ (CX)(SI*1), DX
3735  	MOVL SI, BX
3736  
3737  	// genMemMoveShort
3738  	CMPQ BX, $0x03
3739  	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2
3740  	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3
3741  	CMPQ BX, $0x08
3742  	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7
3743  	CMPQ BX, $0x10
3744  	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16
3745  	CMPQ BX, $0x20
3746  	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32
3747  	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64
3748  
3749  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2:
3750  	MOVB (AX), SI
3751  	MOVB -1(AX)(BX*1), AL
3752  	MOVB SI, (CX)
3753  	MOVB AL, -1(CX)(BX*1)
3754  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
3755  
3756  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3:
3757  	MOVW (AX), SI
3758  	MOVB 2(AX), AL
3759  	MOVW SI, (CX)
3760  	MOVB AL, 2(CX)
3761  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
3762  
3763  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7:
3764  	MOVL (AX), SI
3765  	MOVL -4(AX)(BX*1), AX
3766  	MOVL SI, (CX)
3767  	MOVL AX, -4(CX)(BX*1)
3768  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
3769  
3770  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16:
3771  	MOVQ (AX), SI
3772  	MOVQ -8(AX)(BX*1), AX
3773  	MOVQ SI, (CX)
3774  	MOVQ AX, -8(CX)(BX*1)
3775  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
3776  
3777  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32:
3778  	MOVOU (AX), X0
3779  	MOVOU -16(AX)(BX*1), X1
3780  	MOVOU X0, (CX)
3781  	MOVOU X1, -16(CX)(BX*1)
3782  	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm12B
3783  
3784  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64:
3785  	MOVOU (AX), X0
3786  	MOVOU 16(AX), X1
3787  	MOVOU -32(AX)(BX*1), X2
3788  	MOVOU -16(AX)(BX*1), X3
3789  	MOVOU X0, (CX)
3790  	MOVOU X1, 16(CX)
3791  	MOVOU X2, -32(CX)(BX*1)
3792  	MOVOU X3, -16(CX)(BX*1)
3793  
3794  memmove_end_copy_emit_remainder_encodeBlockAsm12B:
3795  	MOVQ DX, CX
3796  	JMP  emit_literal_done_emit_remainder_encodeBlockAsm12B
3797  
3798  memmove_long_emit_remainder_encodeBlockAsm12B:
3799  	LEAQ (CX)(SI*1), DX
3800  	MOVL SI, BX
3801  
3802  	// genMemMoveLong
3803  	MOVOU (AX), X0
3804  	MOVOU 16(AX), X1
3805  	MOVOU -32(AX)(BX*1), X2
3806  	MOVOU -16(AX)(BX*1), X3
3807  	MOVQ  BX, DI
3808  	SHRQ  $0x05, DI
3809  	MOVQ  CX, SI
3810  	ANDL  $0x0000001f, SI
3811  	MOVQ  $0x00000040, R8
3812  	SUBQ  SI, R8
3813  	DECQ  DI
3814  	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
3815  	LEAQ  -32(AX)(R8*1), SI
3816  	LEAQ  -32(CX)(R8*1), R9
3817  
3818  emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back:
3819  	MOVOU (SI), X4
3820  	MOVOU 16(SI), X5
3821  	MOVOA X4, (R9)
3822  	MOVOA X5, 16(R9)
3823  	ADDQ  $0x20, R9
3824  	ADDQ  $0x20, SI
3825  	ADDQ  $0x20, R8
3826  	DECQ  DI
3827  	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back
3828  
3829  emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32:
3830  	MOVOU -32(AX)(R8*1), X4
3831  	MOVOU -16(AX)(R8*1), X5
3832  	MOVOA X4, -32(CX)(R8*1)
3833  	MOVOA X5, -16(CX)(R8*1)
3834  	ADDQ  $0x20, R8
3835  	CMPQ  BX, R8
3836  	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
3837  	MOVOU X0, (CX)
3838  	MOVOU X1, 16(CX)
3839  	MOVOU X2, -32(CX)(BX*1)
3840  	MOVOU X3, -16(CX)(BX*1)
3841  	MOVQ  DX, CX
3842  
3843  emit_literal_done_emit_remainder_encodeBlockAsm12B:
3844  	MOVQ dst_base+0(FP), AX
3845  	SUBQ AX, CX
3846  	MOVQ CX, ret+56(FP)
3847  	RET
3848  
3849  // func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int
3850  // Requires: BMI, SSE2
3851  TEXT ·encodeBlockAsm10B(SB), $24-64
3852  	MOVQ tmp+48(FP), AX
3853  	MOVQ dst_base+0(FP), CX
3854  	MOVQ $0x00000020, DX
3855  	MOVQ AX, BX
3856  	PXOR X0, X0
3857  
3858  zero_loop_encodeBlockAsm10B:
3859  	MOVOU X0, (BX)
3860  	MOVOU X0, 16(BX)
3861  	MOVOU X0, 32(BX)
3862  	MOVOU X0, 48(BX)
3863  	MOVOU X0, 64(BX)
3864  	MOVOU X0, 80(BX)
3865  	MOVOU X0, 96(BX)
3866  	MOVOU X0, 112(BX)
3867  	ADDQ  $0x80, BX
3868  	DECQ  DX
3869  	JNZ   zero_loop_encodeBlockAsm10B
3870  	MOVL  $0x00000000, 12(SP)
3871  	MOVQ  src_len+32(FP), DX
3872  	LEAQ  -9(DX), BX
3873  	LEAQ  -8(DX), SI
3874  	MOVL  SI, 8(SP)
3875  	SHRQ  $0x05, DX
3876  	SUBL  DX, BX
3877  	LEAQ  (CX)(BX*1), BX
3878  	MOVQ  BX, (SP)
3879  	MOVL  $0x00000001, DX
3880  	MOVL  DX, 16(SP)
3881  	MOVQ  src_base+24(FP), BX
3882  
3883  search_loop_encodeBlockAsm10B:
3884  	MOVL  DX, SI
3885  	SUBL  12(SP), SI
3886  	SHRL  $0x05, SI
3887  	LEAL  4(DX)(SI*1), SI
3888  	CMPL  SI, 8(SP)
3889  	JAE   emit_remainder_encodeBlockAsm10B
3890  	MOVQ  (BX)(DX*1), DI
3891  	MOVL  SI, 20(SP)
3892  	MOVQ  $0x9e3779b1, R9
3893  	MOVQ  DI, R10
3894  	MOVQ  DI, R11
3895  	SHRQ  $0x08, R11
3896  	SHLQ  $0x20, R10
3897  	IMULQ R9, R10
3898  	SHRQ  $0x36, R10
3899  	SHLQ  $0x20, R11
3900  	IMULQ R9, R11
3901  	SHRQ  $0x36, R11
3902  	MOVL  (AX)(R10*4), SI
3903  	MOVL  (AX)(R11*4), R8
3904  	MOVL  DX, (AX)(R10*4)
3905  	LEAL  1(DX), R10
3906  	MOVL  R10, (AX)(R11*4)
3907  	MOVQ  DI, R10
3908  	SHRQ  $0x10, R10
3909  	SHLQ  $0x20, R10
3910  	IMULQ R9, R10
3911  	SHRQ  $0x36, R10
3912  	MOVL  DX, R9
3913  	SUBL  16(SP), R9
3914  	MOVL  1(BX)(R9*1), R11
3915  	MOVQ  DI, R9
3916  	SHRQ  $0x08, R9
3917  	CMPL  R9, R11
3918  	JNE   no_repeat_found_encodeBlockAsm10B
3919  	LEAL  1(DX), DI
3920  	MOVL  12(SP), R8
3921  	MOVL  DI, SI
3922  	SUBL  16(SP), SI
3923  	JZ    repeat_extend_back_end_encodeBlockAsm10B
3924  
3925  repeat_extend_back_loop_encodeBlockAsm10B:
3926  	CMPL DI, R8
3927  	JBE  repeat_extend_back_end_encodeBlockAsm10B
3928  	MOVB -1(BX)(SI*1), R9
3929  	MOVB -1(BX)(DI*1), R10
3930  	CMPB R9, R10
3931  	JNE  repeat_extend_back_end_encodeBlockAsm10B
3932  	LEAL -1(DI), DI
3933  	DECL SI
3934  	JNZ  repeat_extend_back_loop_encodeBlockAsm10B
3935  
3936  repeat_extend_back_end_encodeBlockAsm10B:
3937  	MOVL DI, SI
3938  	SUBL 12(SP), SI
3939  	LEAQ 3(CX)(SI*1), SI
3940  	CMPQ SI, (SP)
3941  	JB   repeat_dst_size_check_encodeBlockAsm10B
3942  	MOVQ $0x00000000, ret+56(FP)
3943  	RET
3944  
3945  repeat_dst_size_check_encodeBlockAsm10B:
3946  	MOVL 12(SP), SI
3947  	CMPL SI, DI
3948  	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm10B
3949  	MOVL DI, R9
3950  	MOVL DI, 12(SP)
3951  	LEAQ (BX)(SI*1), R10
3952  	SUBL SI, R9
3953  	LEAL -1(R9), SI
3954  	CMPL SI, $0x3c
3955  	JB   one_byte_repeat_emit_encodeBlockAsm10B
3956  	CMPL SI, $0x00000100
3957  	JB   two_bytes_repeat_emit_encodeBlockAsm10B
3958  	JB   three_bytes_repeat_emit_encodeBlockAsm10B
3959  
3960  three_bytes_repeat_emit_encodeBlockAsm10B:
3961  	MOVB $0xf4, (CX)
3962  	MOVW SI, 1(CX)
3963  	ADDQ $0x03, CX
3964  	JMP  memmove_long_repeat_emit_encodeBlockAsm10B
3965  
3966  two_bytes_repeat_emit_encodeBlockAsm10B:
3967  	MOVB $0xf0, (CX)
3968  	MOVB SI, 1(CX)
3969  	ADDQ $0x02, CX
3970  	CMPL SI, $0x40
3971  	JB   memmove_repeat_emit_encodeBlockAsm10B
3972  	JMP  memmove_long_repeat_emit_encodeBlockAsm10B
3973  
3974  one_byte_repeat_emit_encodeBlockAsm10B:
3975  	SHLB $0x02, SI
3976  	MOVB SI, (CX)
3977  	ADDQ $0x01, CX
3978  
3979  memmove_repeat_emit_encodeBlockAsm10B:
3980  	LEAQ (CX)(R9*1), SI
3981  
3982  	// genMemMoveShort
3983  	CMPQ R9, $0x08
3984  	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8
3985  	CMPQ R9, $0x10
3986  	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16
3987  	CMPQ R9, $0x20
3988  	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32
3989  	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64
3990  
3991  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8:
3992  	MOVQ (R10), R11
3993  	MOVQ R11, (CX)
3994  	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B
3995  
3996  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16:
3997  	MOVQ (R10), R11
3998  	MOVQ -8(R10)(R9*1), R10
3999  	MOVQ R11, (CX)
4000  	MOVQ R10, -8(CX)(R9*1)
4001  	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B
4002  
4003  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32:
4004  	MOVOU (R10), X0
4005  	MOVOU -16(R10)(R9*1), X1
4006  	MOVOU X0, (CX)
4007  	MOVOU X1, -16(CX)(R9*1)
4008  	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm10B
4009  
4010  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64:
4011  	MOVOU (R10), X0
4012  	MOVOU 16(R10), X1
4013  	MOVOU -32(R10)(R9*1), X2
4014  	MOVOU -16(R10)(R9*1), X3
4015  	MOVOU X0, (CX)
4016  	MOVOU X1, 16(CX)
4017  	MOVOU X2, -32(CX)(R9*1)
4018  	MOVOU X3, -16(CX)(R9*1)
4019  
4020  memmove_end_copy_repeat_emit_encodeBlockAsm10B:
4021  	MOVQ SI, CX
4022  	JMP  emit_literal_done_repeat_emit_encodeBlockAsm10B
4023  
4024  memmove_long_repeat_emit_encodeBlockAsm10B:
4025  	LEAQ (CX)(R9*1), SI
4026  
4027  	// genMemMoveLong
4028  	MOVOU (R10), X0
4029  	MOVOU 16(R10), X1
4030  	MOVOU -32(R10)(R9*1), X2
4031  	MOVOU -16(R10)(R9*1), X3
4032  	MOVQ  R9, R12
4033  	SHRQ  $0x05, R12
4034  	MOVQ  CX, R11
4035  	ANDL  $0x0000001f, R11
4036  	MOVQ  $0x00000040, R13
4037  	SUBQ  R11, R13
4038  	DECQ  R12
4039  	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
4040  	LEAQ  -32(R10)(R13*1), R11
4041  	LEAQ  -32(CX)(R13*1), R14
4042  
4043  emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back:
4044  	MOVOU (R11), X4
4045  	MOVOU 16(R11), X5
4046  	MOVOA X4, (R14)
4047  	MOVOA X5, 16(R14)
4048  	ADDQ  $0x20, R14
4049  	ADDQ  $0x20, R11
4050  	ADDQ  $0x20, R13
4051  	DECQ  R12
4052  	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back
4053  
4054  emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
4055  	MOVOU -32(R10)(R13*1), X4
4056  	MOVOU -16(R10)(R13*1), X5
4057  	MOVOA X4, -32(CX)(R13*1)
4058  	MOVOA X5, -16(CX)(R13*1)
4059  	ADDQ  $0x20, R13
4060  	CMPQ  R9, R13
4061  	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
4062  	MOVOU X0, (CX)
4063  	MOVOU X1, 16(CX)
4064  	MOVOU X2, -32(CX)(R9*1)
4065  	MOVOU X3, -16(CX)(R9*1)
4066  	MOVQ  SI, CX
4067  
4068  emit_literal_done_repeat_emit_encodeBlockAsm10B:
4069  	ADDL $0x05, DX
4070  	MOVL DX, SI
4071  	SUBL 16(SP), SI
4072  	MOVQ src_len+32(FP), R9
4073  	SUBL DX, R9
4074  	LEAQ (BX)(DX*1), R10
4075  	LEAQ (BX)(SI*1), SI
4076  
4077  	// matchLen
4078  	XORL R12, R12
4079  
4080  matchlen_loopback_16_repeat_extend_encodeBlockAsm10B:
4081  	CMPL R9, $0x10
4082  	JB   matchlen_match8_repeat_extend_encodeBlockAsm10B
4083  	MOVQ (R10)(R12*1), R11
4084  	MOVQ 8(R10)(R12*1), R13
4085  	XORQ (SI)(R12*1), R11
4086  	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm10B
4087  	XORQ 8(SI)(R12*1), R13
4088  	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm10B
4089  	LEAL -16(R9), R9
4090  	LEAL 16(R12), R12
4091  	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm10B
4092  
4093  matchlen_bsf_16repeat_extend_encodeBlockAsm10B:
4094  #ifdef GOAMD64_v3
4095  	TZCNTQ R13, R13
4096  
4097  #else
4098  	BSFQ R13, R13
4099  
4100  #endif
4101  	SARQ $0x03, R13
4102  	LEAL 8(R12)(R13*1), R12
4103  	JMP  repeat_extend_forward_end_encodeBlockAsm10B
4104  
4105  matchlen_match8_repeat_extend_encodeBlockAsm10B:
4106  	CMPL R9, $0x08
4107  	JB   matchlen_match4_repeat_extend_encodeBlockAsm10B
4108  	MOVQ (R10)(R12*1), R11
4109  	XORQ (SI)(R12*1), R11
4110  	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm10B
4111  	LEAL -8(R9), R9
4112  	LEAL 8(R12), R12
4113  	JMP  matchlen_match4_repeat_extend_encodeBlockAsm10B
4114  
4115  matchlen_bsf_8_repeat_extend_encodeBlockAsm10B:
4116  #ifdef GOAMD64_v3
4117  	TZCNTQ R11, R11
4118  
4119  #else
4120  	BSFQ R11, R11
4121  
4122  #endif
4123  	SARQ $0x03, R11
4124  	LEAL (R12)(R11*1), R12
4125  	JMP  repeat_extend_forward_end_encodeBlockAsm10B
4126  
4127  matchlen_match4_repeat_extend_encodeBlockAsm10B:
4128  	CMPL R9, $0x04
4129  	JB   matchlen_match2_repeat_extend_encodeBlockAsm10B
4130  	MOVL (R10)(R12*1), R11
4131  	CMPL (SI)(R12*1), R11
4132  	JNE  matchlen_match2_repeat_extend_encodeBlockAsm10B
4133  	LEAL -4(R9), R9
4134  	LEAL 4(R12), R12
4135  
4136  matchlen_match2_repeat_extend_encodeBlockAsm10B:
4137  	CMPL R9, $0x01
4138  	JE   matchlen_match1_repeat_extend_encodeBlockAsm10B
4139  	JB   repeat_extend_forward_end_encodeBlockAsm10B
4140  	MOVW (R10)(R12*1), R11
4141  	CMPW (SI)(R12*1), R11
4142  	JNE  matchlen_match1_repeat_extend_encodeBlockAsm10B
4143  	LEAL 2(R12), R12
4144  	SUBL $0x02, R9
4145  	JZ   repeat_extend_forward_end_encodeBlockAsm10B
4146  
4147  matchlen_match1_repeat_extend_encodeBlockAsm10B:
4148  	MOVB (R10)(R12*1), R11
4149  	CMPB (SI)(R12*1), R11
4150  	JNE  repeat_extend_forward_end_encodeBlockAsm10B
4151  	LEAL 1(R12), R12
4152  
4153  repeat_extend_forward_end_encodeBlockAsm10B:
4154  	ADDL  R12, DX
4155  	MOVL  DX, SI
4156  	SUBL  DI, SI
4157  	MOVL  16(SP), DI
4158  	TESTL R8, R8
4159  	JZ    repeat_as_copy_encodeBlockAsm10B
4160  
4161  	// emitRepeat
4162  	MOVL SI, R8
4163  	LEAL -4(SI), SI
4164  	CMPL R8, $0x08
4165  	JBE  repeat_two_match_repeat_encodeBlockAsm10B
4166  	CMPL R8, $0x0c
4167  	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm10B
4168  	CMPL DI, $0x00000800
4169  	JB   repeat_two_offset_match_repeat_encodeBlockAsm10B
4170  
4171  cant_repeat_two_offset_match_repeat_encodeBlockAsm10B:
4172  	CMPL SI, $0x00000104
4173  	JB   repeat_three_match_repeat_encodeBlockAsm10B
4174  	LEAL -256(SI), SI
4175  	MOVW $0x0019, (CX)
4176  	MOVW SI, 2(CX)
4177  	ADDQ $0x04, CX
4178  	JMP  repeat_end_emit_encodeBlockAsm10B
4179  
4180  repeat_three_match_repeat_encodeBlockAsm10B:
4181  	LEAL -4(SI), SI
4182  	MOVW $0x0015, (CX)
4183  	MOVB SI, 2(CX)
4184  	ADDQ $0x03, CX
4185  	JMP  repeat_end_emit_encodeBlockAsm10B
4186  
4187  repeat_two_match_repeat_encodeBlockAsm10B:
4188  	SHLL $0x02, SI
4189  	ORL  $0x01, SI
4190  	MOVW SI, (CX)
4191  	ADDQ $0x02, CX
4192  	JMP  repeat_end_emit_encodeBlockAsm10B
4193  
4194  repeat_two_offset_match_repeat_encodeBlockAsm10B:
4195  	XORQ R8, R8
4196  	LEAL 1(R8)(SI*4), SI
4197  	MOVB DI, 1(CX)
4198  	SARL $0x08, DI
4199  	SHLL $0x05, DI
4200  	ORL  DI, SI
4201  	MOVB SI, (CX)
4202  	ADDQ $0x02, CX
4203  	JMP  repeat_end_emit_encodeBlockAsm10B
4204  
4205  repeat_as_copy_encodeBlockAsm10B:
4206  	// emitCopy
4207  	CMPL SI, $0x40
4208  	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B
4209  	CMPL DI, $0x00000800
4210  	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm10B
4211  	MOVL $0x00000001, R8
4212  	LEAL 16(R8), R8
4213  	MOVB DI, 1(CX)
4214  	SHRL $0x08, DI
4215  	SHLL $0x05, DI
4216  	ORL  DI, R8
4217  	MOVB R8, (CX)
4218  	ADDQ $0x02, CX
4219  	SUBL $0x08, SI
4220  
4221  	// emitRepeat
4222  	LEAL -4(SI), SI
4223  	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
4224  	MOVL SI, R8
4225  	LEAL -4(SI), SI
4226  	CMPL R8, $0x08
4227  	JBE  repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
4228  	CMPL R8, $0x0c
4229  	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
4230  	CMPL DI, $0x00000800
4231  	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
4232  
4233  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
4234  	CMPL SI, $0x00000104
4235  	JB   repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
4236  	LEAL -256(SI), SI
4237  	MOVW $0x0019, (CX)
4238  	MOVW SI, 2(CX)
4239  	ADDQ $0x04, CX
4240  	JMP  repeat_end_emit_encodeBlockAsm10B
4241  
4242  repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
4243  	LEAL -4(SI), SI
4244  	MOVW $0x0015, (CX)
4245  	MOVB SI, 2(CX)
4246  	ADDQ $0x03, CX
4247  	JMP  repeat_end_emit_encodeBlockAsm10B
4248  
4249  repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
4250  	SHLL $0x02, SI
4251  	ORL  $0x01, SI
4252  	MOVW SI, (CX)
4253  	ADDQ $0x02, CX
4254  	JMP  repeat_end_emit_encodeBlockAsm10B
4255  
4256  repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
4257  	XORQ R8, R8
4258  	LEAL 1(R8)(SI*4), SI
4259  	MOVB DI, 1(CX)
4260  	SARL $0x08, DI
4261  	SHLL $0x05, DI
4262  	ORL  DI, SI
4263  	MOVB SI, (CX)
4264  	ADDQ $0x02, CX
4265  	JMP  repeat_end_emit_encodeBlockAsm10B
4266  
4267  long_offset_short_repeat_as_copy_encodeBlockAsm10B:
4268  	MOVB $0xee, (CX)
4269  	MOVW DI, 1(CX)
4270  	LEAL -60(SI), SI
4271  	ADDQ $0x03, CX
4272  
4273  	// emitRepeat
4274  	MOVL SI, R8
4275  	LEAL -4(SI), SI
4276  	CMPL R8, $0x08
4277  	JBE  repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
4278  	CMPL R8, $0x0c
4279  	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
4280  	CMPL DI, $0x00000800
4281  	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
4282  
4283  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
4284  	CMPL SI, $0x00000104
4285  	JB   repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
4286  	LEAL -256(SI), SI
4287  	MOVW $0x0019, (CX)
4288  	MOVW SI, 2(CX)
4289  	ADDQ $0x04, CX
4290  	JMP  repeat_end_emit_encodeBlockAsm10B
4291  
4292  repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
4293  	LEAL -4(SI), SI
4294  	MOVW $0x0015, (CX)
4295  	MOVB SI, 2(CX)
4296  	ADDQ $0x03, CX
4297  	JMP  repeat_end_emit_encodeBlockAsm10B
4298  
4299  repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
4300  	SHLL $0x02, SI
4301  	ORL  $0x01, SI
4302  	MOVW SI, (CX)
4303  	ADDQ $0x02, CX
4304  	JMP  repeat_end_emit_encodeBlockAsm10B
4305  
4306  repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
4307  	XORQ R8, R8
4308  	LEAL 1(R8)(SI*4), SI
4309  	MOVB DI, 1(CX)
4310  	SARL $0x08, DI
4311  	SHLL $0x05, DI
4312  	ORL  DI, SI
4313  	MOVB SI, (CX)
4314  	ADDQ $0x02, CX
4315  	JMP  repeat_end_emit_encodeBlockAsm10B
4316  
4317  two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B:
4318  	MOVL SI, R8
4319  	SHLL $0x02, R8
4320  	CMPL SI, $0x0c
4321  	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm10B
4322  	CMPL DI, $0x00000800
4323  	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm10B
4324  	LEAL -15(R8), R8
4325  	MOVB DI, 1(CX)
4326  	SHRL $0x08, DI
4327  	SHLL $0x05, DI
4328  	ORL  DI, R8
4329  	MOVB R8, (CX)
4330  	ADDQ $0x02, CX
4331  	JMP  repeat_end_emit_encodeBlockAsm10B
4332  
4333  emit_copy_three_repeat_as_copy_encodeBlockAsm10B:
4334  	LEAL -2(R8), R8
4335  	MOVB R8, (CX)
4336  	MOVW DI, 1(CX)
4337  	ADDQ $0x03, CX
4338  
4339  repeat_end_emit_encodeBlockAsm10B:
4340  	MOVL DX, 12(SP)
4341  	JMP  search_loop_encodeBlockAsm10B
4342  
4343  no_repeat_found_encodeBlockAsm10B:
4344  	CMPL (BX)(SI*1), DI
4345  	JEQ  candidate_match_encodeBlockAsm10B
4346  	SHRQ $0x08, DI
4347  	MOVL (AX)(R10*4), SI
4348  	LEAL 2(DX), R9
4349  	CMPL (BX)(R8*1), DI
4350  	JEQ  candidate2_match_encodeBlockAsm10B
4351  	MOVL R9, (AX)(R10*4)
4352  	SHRQ $0x08, DI
4353  	CMPL (BX)(SI*1), DI
4354  	JEQ  candidate3_match_encodeBlockAsm10B
4355  	MOVL 20(SP), DX
4356  	JMP  search_loop_encodeBlockAsm10B
4357  
4358  candidate3_match_encodeBlockAsm10B:
4359  	ADDL $0x02, DX
4360  	JMP  candidate_match_encodeBlockAsm10B
4361  
4362  candidate2_match_encodeBlockAsm10B:
4363  	MOVL R9, (AX)(R10*4)
4364  	INCL DX
4365  	MOVL R8, SI
4366  
4367  candidate_match_encodeBlockAsm10B:
4368  	MOVL  12(SP), DI
4369  	TESTL SI, SI
4370  	JZ    match_extend_back_end_encodeBlockAsm10B
4371  
4372  match_extend_back_loop_encodeBlockAsm10B:
4373  	CMPL DX, DI
4374  	JBE  match_extend_back_end_encodeBlockAsm10B
4375  	MOVB -1(BX)(SI*1), R8
4376  	MOVB -1(BX)(DX*1), R9
4377  	CMPB R8, R9
4378  	JNE  match_extend_back_end_encodeBlockAsm10B
4379  	LEAL -1(DX), DX
4380  	DECL SI
4381  	JZ   match_extend_back_end_encodeBlockAsm10B
4382  	JMP  match_extend_back_loop_encodeBlockAsm10B
4383  
4384  match_extend_back_end_encodeBlockAsm10B:
4385  	MOVL DX, DI
4386  	SUBL 12(SP), DI
4387  	LEAQ 3(CX)(DI*1), DI
4388  	CMPQ DI, (SP)
4389  	JB   match_dst_size_check_encodeBlockAsm10B
4390  	MOVQ $0x00000000, ret+56(FP)
4391  	RET
4392  
4393  match_dst_size_check_encodeBlockAsm10B:
4394  	MOVL DX, DI
4395  	MOVL 12(SP), R8
4396  	CMPL R8, DI
4397  	JEQ  emit_literal_done_match_emit_encodeBlockAsm10B
4398  	MOVL DI, R9
4399  	MOVL DI, 12(SP)
4400  	LEAQ (BX)(R8*1), DI
4401  	SUBL R8, R9
4402  	LEAL -1(R9), R8
4403  	CMPL R8, $0x3c
4404  	JB   one_byte_match_emit_encodeBlockAsm10B
4405  	CMPL R8, $0x00000100
4406  	JB   two_bytes_match_emit_encodeBlockAsm10B
4407  	JB   three_bytes_match_emit_encodeBlockAsm10B
4408  
4409  three_bytes_match_emit_encodeBlockAsm10B:
4410  	MOVB $0xf4, (CX)
4411  	MOVW R8, 1(CX)
4412  	ADDQ $0x03, CX
4413  	JMP  memmove_long_match_emit_encodeBlockAsm10B
4414  
4415  two_bytes_match_emit_encodeBlockAsm10B:
4416  	MOVB $0xf0, (CX)
4417  	MOVB R8, 1(CX)
4418  	ADDQ $0x02, CX
4419  	CMPL R8, $0x40
4420  	JB   memmove_match_emit_encodeBlockAsm10B
4421  	JMP  memmove_long_match_emit_encodeBlockAsm10B
4422  
4423  one_byte_match_emit_encodeBlockAsm10B:
4424  	SHLB $0x02, R8
4425  	MOVB R8, (CX)
4426  	ADDQ $0x01, CX
4427  
4428  memmove_match_emit_encodeBlockAsm10B:
4429  	LEAQ (CX)(R9*1), R8
4430  
4431  	// genMemMoveShort
4432  	CMPQ R9, $0x08
4433  	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8
4434  	CMPQ R9, $0x10
4435  	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16
4436  	CMPQ R9, $0x20
4437  	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32
4438  	JMP  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64
4439  
4440  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8:
4441  	MOVQ (DI), R10
4442  	MOVQ R10, (CX)
4443  	JMP  memmove_end_copy_match_emit_encodeBlockAsm10B
4444  
4445  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16:
4446  	MOVQ (DI), R10
4447  	MOVQ -8(DI)(R9*1), DI
4448  	MOVQ R10, (CX)
4449  	MOVQ DI, -8(CX)(R9*1)
4450  	JMP  memmove_end_copy_match_emit_encodeBlockAsm10B
4451  
4452  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32:
4453  	MOVOU (DI), X0
4454  	MOVOU -16(DI)(R9*1), X1
4455  	MOVOU X0, (CX)
4456  	MOVOU X1, -16(CX)(R9*1)
4457  	JMP   memmove_end_copy_match_emit_encodeBlockAsm10B
4458  
4459  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64:
4460  	MOVOU (DI), X0
4461  	MOVOU 16(DI), X1
4462  	MOVOU -32(DI)(R9*1), X2
4463  	MOVOU -16(DI)(R9*1), X3
4464  	MOVOU X0, (CX)
4465  	MOVOU X1, 16(CX)
4466  	MOVOU X2, -32(CX)(R9*1)
4467  	MOVOU X3, -16(CX)(R9*1)
4468  
4469  memmove_end_copy_match_emit_encodeBlockAsm10B:
4470  	MOVQ R8, CX
4471  	JMP  emit_literal_done_match_emit_encodeBlockAsm10B
4472  
4473  memmove_long_match_emit_encodeBlockAsm10B:
4474  	LEAQ (CX)(R9*1), R8
4475  
4476  	// genMemMoveLong
4477  	MOVOU (DI), X0
4478  	MOVOU 16(DI), X1
4479  	MOVOU -32(DI)(R9*1), X2
4480  	MOVOU -16(DI)(R9*1), X3
4481  	MOVQ  R9, R11
4482  	SHRQ  $0x05, R11
4483  	MOVQ  CX, R10
4484  	ANDL  $0x0000001f, R10
4485  	MOVQ  $0x00000040, R12
4486  	SUBQ  R10, R12
4487  	DECQ  R11
4488  	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
4489  	LEAQ  -32(DI)(R12*1), R10
4490  	LEAQ  -32(CX)(R12*1), R13
4491  
4492  emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back:
4493  	MOVOU (R10), X4
4494  	MOVOU 16(R10), X5
4495  	MOVOA X4, (R13)
4496  	MOVOA X5, 16(R13)
4497  	ADDQ  $0x20, R13
4498  	ADDQ  $0x20, R10
4499  	ADDQ  $0x20, R12
4500  	DECQ  R11
4501  	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back
4502  
4503  emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
4504  	MOVOU -32(DI)(R12*1), X4
4505  	MOVOU -16(DI)(R12*1), X5
4506  	MOVOA X4, -32(CX)(R12*1)
4507  	MOVOA X5, -16(CX)(R12*1)
4508  	ADDQ  $0x20, R12
4509  	CMPQ  R9, R12
4510  	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
4511  	MOVOU X0, (CX)
4512  	MOVOU X1, 16(CX)
4513  	MOVOU X2, -32(CX)(R9*1)
4514  	MOVOU X3, -16(CX)(R9*1)
4515  	MOVQ  R8, CX
4516  
4517  emit_literal_done_match_emit_encodeBlockAsm10B:
4518  match_nolit_loop_encodeBlockAsm10B:
4519  	MOVL DX, DI
4520  	SUBL SI, DI
4521  	MOVL DI, 16(SP)
4522  	ADDL $0x04, DX
4523  	ADDL $0x04, SI
4524  	MOVQ src_len+32(FP), DI
4525  	SUBL DX, DI
4526  	LEAQ (BX)(DX*1), R8
4527  	LEAQ (BX)(SI*1), SI
4528  
4529  	// matchLen
4530  	XORL R10, R10
4531  
4532  matchlen_loopback_16_match_nolit_encodeBlockAsm10B:
4533  	CMPL DI, $0x10
4534  	JB   matchlen_match8_match_nolit_encodeBlockAsm10B
4535  	MOVQ (R8)(R10*1), R9
4536  	MOVQ 8(R8)(R10*1), R11
4537  	XORQ (SI)(R10*1), R9
4538  	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm10B
4539  	XORQ 8(SI)(R10*1), R11
4540  	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm10B
4541  	LEAL -16(DI), DI
4542  	LEAL 16(R10), R10
4543  	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm10B
4544  
4545  matchlen_bsf_16match_nolit_encodeBlockAsm10B:
4546  #ifdef GOAMD64_v3
4547  	TZCNTQ R11, R11
4548  
4549  #else
4550  	BSFQ R11, R11
4551  
4552  #endif
4553  	SARQ $0x03, R11
4554  	LEAL 8(R10)(R11*1), R10
4555  	JMP  match_nolit_end_encodeBlockAsm10B
4556  
4557  matchlen_match8_match_nolit_encodeBlockAsm10B:
4558  	CMPL DI, $0x08
4559  	JB   matchlen_match4_match_nolit_encodeBlockAsm10B
4560  	MOVQ (R8)(R10*1), R9
4561  	XORQ (SI)(R10*1), R9
4562  	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm10B
4563  	LEAL -8(DI), DI
4564  	LEAL 8(R10), R10
4565  	JMP  matchlen_match4_match_nolit_encodeBlockAsm10B
4566  
4567  matchlen_bsf_8_match_nolit_encodeBlockAsm10B:
4568  #ifdef GOAMD64_v3
4569  	TZCNTQ R9, R9
4570  
4571  #else
4572  	BSFQ R9, R9
4573  
4574  #endif
4575  	SARQ $0x03, R9
4576  	LEAL (R10)(R9*1), R10
4577  	JMP  match_nolit_end_encodeBlockAsm10B
4578  
4579  matchlen_match4_match_nolit_encodeBlockAsm10B:
4580  	CMPL DI, $0x04
4581  	JB   matchlen_match2_match_nolit_encodeBlockAsm10B
4582  	MOVL (R8)(R10*1), R9
4583  	CMPL (SI)(R10*1), R9
4584  	JNE  matchlen_match2_match_nolit_encodeBlockAsm10B
4585  	LEAL -4(DI), DI
4586  	LEAL 4(R10), R10
4587  
4588  matchlen_match2_match_nolit_encodeBlockAsm10B:
4589  	CMPL DI, $0x01
4590  	JE   matchlen_match1_match_nolit_encodeBlockAsm10B
4591  	JB   match_nolit_end_encodeBlockAsm10B
4592  	MOVW (R8)(R10*1), R9
4593  	CMPW (SI)(R10*1), R9
4594  	JNE  matchlen_match1_match_nolit_encodeBlockAsm10B
4595  	LEAL 2(R10), R10
4596  	SUBL $0x02, DI
4597  	JZ   match_nolit_end_encodeBlockAsm10B
4598  
4599  matchlen_match1_match_nolit_encodeBlockAsm10B:
4600  	MOVB (R8)(R10*1), R9
4601  	CMPB (SI)(R10*1), R9
4602  	JNE  match_nolit_end_encodeBlockAsm10B
4603  	LEAL 1(R10), R10
4604  
4605  match_nolit_end_encodeBlockAsm10B:
4606  	ADDL R10, DX
4607  	MOVL 16(SP), SI
4608  	ADDL $0x04, R10
4609  	MOVL DX, 12(SP)
4610  
4611  	// emitCopy
4612  	CMPL R10, $0x40
4613  	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm10B
4614  	CMPL SI, $0x00000800
4615  	JAE  long_offset_short_match_nolit_encodeBlockAsm10B
4616  	MOVL $0x00000001, DI
4617  	LEAL 16(DI), DI
4618  	MOVB SI, 1(CX)
4619  	SHRL $0x08, SI
4620  	SHLL $0x05, SI
4621  	ORL  SI, DI
4622  	MOVB DI, (CX)
4623  	ADDQ $0x02, CX
4624  	SUBL $0x08, R10
4625  
4626  	// emitRepeat
4627  	LEAL -4(R10), R10
4628  	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
4629  	MOVL R10, DI
4630  	LEAL -4(R10), R10
4631  	CMPL DI, $0x08
4632  	JBE  repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
4633  	CMPL DI, $0x0c
4634  	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
4635  	CMPL SI, $0x00000800
4636  	JB   repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
4637  
4638  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
4639  	CMPL R10, $0x00000104
4640  	JB   repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
4641  	LEAL -256(R10), R10
4642  	MOVW $0x0019, (CX)
4643  	MOVW R10, 2(CX)
4644  	ADDQ $0x04, CX
4645  	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
4646  
4647  repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
4648  	LEAL -4(R10), R10
4649  	MOVW $0x0015, (CX)
4650  	MOVB R10, 2(CX)
4651  	ADDQ $0x03, CX
4652  	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
4653  
4654  repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
4655  	SHLL $0x02, R10
4656  	ORL  $0x01, R10
4657  	MOVW R10, (CX)
4658  	ADDQ $0x02, CX
4659  	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
4660  
4661  repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
4662  	XORQ DI, DI
4663  	LEAL 1(DI)(R10*4), R10
4664  	MOVB SI, 1(CX)
4665  	SARL $0x08, SI
4666  	SHLL $0x05, SI
4667  	ORL  SI, R10
4668  	MOVB R10, (CX)
4669  	ADDQ $0x02, CX
4670  	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
4671  
4672  long_offset_short_match_nolit_encodeBlockAsm10B:
4673  	MOVB $0xee, (CX)
4674  	MOVW SI, 1(CX)
4675  	LEAL -60(R10), R10
4676  	ADDQ $0x03, CX
4677  
4678  	// emitRepeat
4679  	MOVL R10, DI
4680  	LEAL -4(R10), R10
4681  	CMPL DI, $0x08
4682  	JBE  repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short
4683  	CMPL DI, $0x0c
4684  	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
4685  	CMPL SI, $0x00000800
4686  	JB   repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
4687  
4688  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
4689  	CMPL R10, $0x00000104
4690  	JB   repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short
4691  	LEAL -256(R10), R10
4692  	MOVW $0x0019, (CX)
4693  	MOVW R10, 2(CX)
4694  	ADDQ $0x04, CX
4695  	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
4696  
4697  repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short:
4698  	LEAL -4(R10), R10
4699  	MOVW $0x0015, (CX)
4700  	MOVB R10, 2(CX)
4701  	ADDQ $0x03, CX
4702  	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
4703  
4704  repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short:
4705  	SHLL $0x02, R10
4706  	ORL  $0x01, R10
4707  	MOVW R10, (CX)
4708  	ADDQ $0x02, CX
4709  	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
4710  
4711  repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
4712  	XORQ DI, DI
4713  	LEAL 1(DI)(R10*4), R10
4714  	MOVB SI, 1(CX)
4715  	SARL $0x08, SI
4716  	SHLL $0x05, SI
4717  	ORL  SI, R10
4718  	MOVB R10, (CX)
4719  	ADDQ $0x02, CX
4720  	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
4721  
4722  two_byte_offset_short_match_nolit_encodeBlockAsm10B:
4723  	MOVL R10, DI
4724  	SHLL $0x02, DI
4725  	CMPL R10, $0x0c
4726  	JAE  emit_copy_three_match_nolit_encodeBlockAsm10B
4727  	CMPL SI, $0x00000800
4728  	JAE  emit_copy_three_match_nolit_encodeBlockAsm10B
4729  	LEAL -15(DI), DI
4730  	MOVB SI, 1(CX)
4731  	SHRL $0x08, SI
4732  	SHLL $0x05, SI
4733  	ORL  SI, DI
4734  	MOVB DI, (CX)
4735  	ADDQ $0x02, CX
4736  	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
4737  
4738  emit_copy_three_match_nolit_encodeBlockAsm10B:
4739  	LEAL -2(DI), DI
4740  	MOVB DI, (CX)
4741  	MOVW SI, 1(CX)
4742  	ADDQ $0x03, CX
4743  
4744  match_nolit_emitcopy_end_encodeBlockAsm10B:
4745  	CMPL DX, 8(SP)
4746  	JAE  emit_remainder_encodeBlockAsm10B
4747  	MOVQ -2(BX)(DX*1), DI
4748  	CMPQ CX, (SP)
4749  	JB   match_nolit_dst_ok_encodeBlockAsm10B
4750  	MOVQ $0x00000000, ret+56(FP)
4751  	RET
4752  
4753  match_nolit_dst_ok_encodeBlockAsm10B:
4754  	MOVQ  $0x9e3779b1, R9
4755  	MOVQ  DI, R8
4756  	SHRQ  $0x10, DI
4757  	MOVQ  DI, SI
4758  	SHLQ  $0x20, R8
4759  	IMULQ R9, R8
4760  	SHRQ  $0x36, R8
4761  	SHLQ  $0x20, SI
4762  	IMULQ R9, SI
4763  	SHRQ  $0x36, SI
4764  	LEAL  -2(DX), R9
4765  	LEAQ  (AX)(SI*4), R10
4766  	MOVL  (R10), SI
4767  	MOVL  R9, (AX)(R8*4)
4768  	MOVL  DX, (R10)
4769  	CMPL  (BX)(SI*1), DI
4770  	JEQ   match_nolit_loop_encodeBlockAsm10B
4771  	INCL  DX
4772  	JMP   search_loop_encodeBlockAsm10B
4773  
4774  emit_remainder_encodeBlockAsm10B:
4775  	MOVQ src_len+32(FP), AX
4776  	SUBL 12(SP), AX
4777  	LEAQ 3(CX)(AX*1), AX
4778  	CMPQ AX, (SP)
4779  	JB   emit_remainder_ok_encodeBlockAsm10B
4780  	MOVQ $0x00000000, ret+56(FP)
4781  	RET
4782  
4783  emit_remainder_ok_encodeBlockAsm10B:
4784  	MOVQ src_len+32(FP), AX
4785  	MOVL 12(SP), DX
4786  	CMPL DX, AX
4787  	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm10B
4788  	MOVL AX, SI
4789  	MOVL AX, 12(SP)
4790  	LEAQ (BX)(DX*1), AX
4791  	SUBL DX, SI
4792  	LEAL -1(SI), DX
4793  	CMPL DX, $0x3c
4794  	JB   one_byte_emit_remainder_encodeBlockAsm10B
4795  	CMPL DX, $0x00000100
4796  	JB   two_bytes_emit_remainder_encodeBlockAsm10B
4797  	JB   three_bytes_emit_remainder_encodeBlockAsm10B
4798  
4799  three_bytes_emit_remainder_encodeBlockAsm10B:
4800  	MOVB $0xf4, (CX)
4801  	MOVW DX, 1(CX)
4802  	ADDQ $0x03, CX
4803  	JMP  memmove_long_emit_remainder_encodeBlockAsm10B
4804  
4805  two_bytes_emit_remainder_encodeBlockAsm10B:
4806  	MOVB $0xf0, (CX)
4807  	MOVB DL, 1(CX)
4808  	ADDQ $0x02, CX
4809  	CMPL DX, $0x40
4810  	JB   memmove_emit_remainder_encodeBlockAsm10B
4811  	JMP  memmove_long_emit_remainder_encodeBlockAsm10B
4812  
4813  one_byte_emit_remainder_encodeBlockAsm10B:
4814  	SHLB $0x02, DL
4815  	MOVB DL, (CX)
4816  	ADDQ $0x01, CX
4817  
4818  memmove_emit_remainder_encodeBlockAsm10B:
4819  	LEAQ (CX)(SI*1), DX
4820  	MOVL SI, BX
4821  
4822  	// genMemMoveShort
4823  	CMPQ BX, $0x03
4824  	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2
4825  	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3
4826  	CMPQ BX, $0x08
4827  	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7
4828  	CMPQ BX, $0x10
4829  	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16
4830  	CMPQ BX, $0x20
4831  	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32
4832  	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64
4833  
4834  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2:
4835  	MOVB (AX), SI
4836  	MOVB -1(AX)(BX*1), AL
4837  	MOVB SI, (CX)
4838  	MOVB AL, -1(CX)(BX*1)
4839  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
4840  
4841  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3:
4842  	MOVW (AX), SI
4843  	MOVB 2(AX), AL
4844  	MOVW SI, (CX)
4845  	MOVB AL, 2(CX)
4846  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
4847  
4848  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7:
4849  	MOVL (AX), SI
4850  	MOVL -4(AX)(BX*1), AX
4851  	MOVL SI, (CX)
4852  	MOVL AX, -4(CX)(BX*1)
4853  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
4854  
4855  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16:
4856  	MOVQ (AX), SI
4857  	MOVQ -8(AX)(BX*1), AX
4858  	MOVQ SI, (CX)
4859  	MOVQ AX, -8(CX)(BX*1)
4860  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
4861  
4862  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32:
4863  	MOVOU (AX), X0
4864  	MOVOU -16(AX)(BX*1), X1
4865  	MOVOU X0, (CX)
4866  	MOVOU X1, -16(CX)(BX*1)
4867  	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm10B
4868  
4869  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64:
4870  	MOVOU (AX), X0
4871  	MOVOU 16(AX), X1
4872  	MOVOU -32(AX)(BX*1), X2
4873  	MOVOU -16(AX)(BX*1), X3
4874  	MOVOU X0, (CX)
4875  	MOVOU X1, 16(CX)
4876  	MOVOU X2, -32(CX)(BX*1)
4877  	MOVOU X3, -16(CX)(BX*1)
4878  
4879  memmove_end_copy_emit_remainder_encodeBlockAsm10B:
4880  	MOVQ DX, CX
4881  	JMP  emit_literal_done_emit_remainder_encodeBlockAsm10B
4882  
4883  memmove_long_emit_remainder_encodeBlockAsm10B:
4884  	LEAQ (CX)(SI*1), DX
4885  	MOVL SI, BX
4886  
4887  	// genMemMoveLong
4888  	MOVOU (AX), X0
4889  	MOVOU 16(AX), X1
4890  	MOVOU -32(AX)(BX*1), X2
4891  	MOVOU -16(AX)(BX*1), X3
4892  	MOVQ  BX, DI
4893  	SHRQ  $0x05, DI
4894  	MOVQ  CX, SI
4895  	ANDL  $0x0000001f, SI
4896  	MOVQ  $0x00000040, R8
4897  	SUBQ  SI, R8
4898  	DECQ  DI
4899  	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
4900  	LEAQ  -32(AX)(R8*1), SI
4901  	LEAQ  -32(CX)(R8*1), R9
4902  
4903  emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back:
4904  	MOVOU (SI), X4
4905  	MOVOU 16(SI), X5
4906  	MOVOA X4, (R9)
4907  	MOVOA X5, 16(R9)
4908  	ADDQ  $0x20, R9
4909  	ADDQ  $0x20, SI
4910  	ADDQ  $0x20, R8
4911  	DECQ  DI
4912  	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back
4913  
4914  emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32:
4915  	MOVOU -32(AX)(R8*1), X4
4916  	MOVOU -16(AX)(R8*1), X5
4917  	MOVOA X4, -32(CX)(R8*1)
4918  	MOVOA X5, -16(CX)(R8*1)
4919  	ADDQ  $0x20, R8
4920  	CMPQ  BX, R8
4921  	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
4922  	MOVOU X0, (CX)
4923  	MOVOU X1, 16(CX)
4924  	MOVOU X2, -32(CX)(BX*1)
4925  	MOVOU X3, -16(CX)(BX*1)
4926  	MOVQ  DX, CX
4927  
4928  emit_literal_done_emit_remainder_encodeBlockAsm10B:
4929  	MOVQ dst_base+0(FP), AX
4930  	SUBQ AX, CX
4931  	MOVQ CX, ret+56(FP)
4932  	RET
4933  
4934  // func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int
4935  // Requires: BMI, SSE2
4936  TEXT ·encodeBlockAsm8B(SB), $24-64
4937  	MOVQ tmp+48(FP), AX
4938  	MOVQ dst_base+0(FP), CX
4939  	MOVQ $0x00000008, DX
4940  	MOVQ AX, BX
4941  	PXOR X0, X0
4942  
4943  zero_loop_encodeBlockAsm8B:
4944  	MOVOU X0, (BX)
4945  	MOVOU X0, 16(BX)
4946  	MOVOU X0, 32(BX)
4947  	MOVOU X0, 48(BX)
4948  	MOVOU X0, 64(BX)
4949  	MOVOU X0, 80(BX)
4950  	MOVOU X0, 96(BX)
4951  	MOVOU X0, 112(BX)
4952  	ADDQ  $0x80, BX
4953  	DECQ  DX
4954  	JNZ   zero_loop_encodeBlockAsm8B
4955  	MOVL  $0x00000000, 12(SP)
4956  	MOVQ  src_len+32(FP), DX
4957  	LEAQ  -9(DX), BX
4958  	LEAQ  -8(DX), SI
4959  	MOVL  SI, 8(SP)
4960  	SHRQ  $0x05, DX
4961  	SUBL  DX, BX
4962  	LEAQ  (CX)(BX*1), BX
4963  	MOVQ  BX, (SP)
4964  	MOVL  $0x00000001, DX
4965  	MOVL  DX, 16(SP)
4966  	MOVQ  src_base+24(FP), BX
4967  
4968  search_loop_encodeBlockAsm8B:
4969  	MOVL  DX, SI
4970  	SUBL  12(SP), SI
4971  	SHRL  $0x04, SI
4972  	LEAL  4(DX)(SI*1), SI
4973  	CMPL  SI, 8(SP)
4974  	JAE   emit_remainder_encodeBlockAsm8B
4975  	MOVQ  (BX)(DX*1), DI
4976  	MOVL  SI, 20(SP)
4977  	MOVQ  $0x9e3779b1, R9
4978  	MOVQ  DI, R10
4979  	MOVQ  DI, R11
4980  	SHRQ  $0x08, R11
4981  	SHLQ  $0x20, R10
4982  	IMULQ R9, R10
4983  	SHRQ  $0x38, R10
4984  	SHLQ  $0x20, R11
4985  	IMULQ R9, R11
4986  	SHRQ  $0x38, R11
4987  	MOVL  (AX)(R10*4), SI
4988  	MOVL  (AX)(R11*4), R8
4989  	MOVL  DX, (AX)(R10*4)
4990  	LEAL  1(DX), R10
4991  	MOVL  R10, (AX)(R11*4)
4992  	MOVQ  DI, R10
4993  	SHRQ  $0x10, R10
4994  	SHLQ  $0x20, R10
4995  	IMULQ R9, R10
4996  	SHRQ  $0x38, R10
4997  	MOVL  DX, R9
4998  	SUBL  16(SP), R9
4999  	MOVL  1(BX)(R9*1), R11
5000  	MOVQ  DI, R9
5001  	SHRQ  $0x08, R9
5002  	CMPL  R9, R11
5003  	JNE   no_repeat_found_encodeBlockAsm8B
5004  	LEAL  1(DX), DI
5005  	MOVL  12(SP), R8
5006  	MOVL  DI, SI
5007  	SUBL  16(SP), SI
5008  	JZ    repeat_extend_back_end_encodeBlockAsm8B
5009  
5010  repeat_extend_back_loop_encodeBlockAsm8B:
5011  	CMPL DI, R8
5012  	JBE  repeat_extend_back_end_encodeBlockAsm8B
5013  	MOVB -1(BX)(SI*1), R9
5014  	MOVB -1(BX)(DI*1), R10
5015  	CMPB R9, R10
5016  	JNE  repeat_extend_back_end_encodeBlockAsm8B
5017  	LEAL -1(DI), DI
5018  	DECL SI
5019  	JNZ  repeat_extend_back_loop_encodeBlockAsm8B
5020  
5021  repeat_extend_back_end_encodeBlockAsm8B:
5022  	MOVL DI, SI
5023  	SUBL 12(SP), SI
5024  	LEAQ 3(CX)(SI*1), SI
5025  	CMPQ SI, (SP)
5026  	JB   repeat_dst_size_check_encodeBlockAsm8B
5027  	MOVQ $0x00000000, ret+56(FP)
5028  	RET
5029  
5030  repeat_dst_size_check_encodeBlockAsm8B:
5031  	MOVL 12(SP), SI
5032  	CMPL SI, DI
5033  	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm8B
5034  	MOVL DI, R9
5035  	MOVL DI, 12(SP)
5036  	LEAQ (BX)(SI*1), R10
5037  	SUBL SI, R9
5038  	LEAL -1(R9), SI
5039  	CMPL SI, $0x3c
5040  	JB   one_byte_repeat_emit_encodeBlockAsm8B
5041  	CMPL SI, $0x00000100
5042  	JB   two_bytes_repeat_emit_encodeBlockAsm8B
5043  	JB   three_bytes_repeat_emit_encodeBlockAsm8B
5044  
5045  three_bytes_repeat_emit_encodeBlockAsm8B:
5046  	MOVB $0xf4, (CX)
5047  	MOVW SI, 1(CX)
5048  	ADDQ $0x03, CX
5049  	JMP  memmove_long_repeat_emit_encodeBlockAsm8B
5050  
5051  two_bytes_repeat_emit_encodeBlockAsm8B:
5052  	MOVB $0xf0, (CX)
5053  	MOVB SI, 1(CX)
5054  	ADDQ $0x02, CX
5055  	CMPL SI, $0x40
5056  	JB   memmove_repeat_emit_encodeBlockAsm8B
5057  	JMP  memmove_long_repeat_emit_encodeBlockAsm8B
5058  
5059  one_byte_repeat_emit_encodeBlockAsm8B:
5060  	SHLB $0x02, SI
5061  	MOVB SI, (CX)
5062  	ADDQ $0x01, CX
5063  
5064  memmove_repeat_emit_encodeBlockAsm8B:
5065  	LEAQ (CX)(R9*1), SI
5066  
5067  	// genMemMoveShort
5068  	CMPQ R9, $0x08
5069  	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8
5070  	CMPQ R9, $0x10
5071  	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16
5072  	CMPQ R9, $0x20
5073  	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32
5074  	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64
5075  
5076  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8:
5077  	MOVQ (R10), R11
5078  	MOVQ R11, (CX)
5079  	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B
5080  
5081  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16:
5082  	MOVQ (R10), R11
5083  	MOVQ -8(R10)(R9*1), R10
5084  	MOVQ R11, (CX)
5085  	MOVQ R10, -8(CX)(R9*1)
5086  	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B
5087  
5088  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32:
5089  	MOVOU (R10), X0
5090  	MOVOU -16(R10)(R9*1), X1
5091  	MOVOU X0, (CX)
5092  	MOVOU X1, -16(CX)(R9*1)
5093  	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm8B
5094  
5095  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64:
5096  	MOVOU (R10), X0
5097  	MOVOU 16(R10), X1
5098  	MOVOU -32(R10)(R9*1), X2
5099  	MOVOU -16(R10)(R9*1), X3
5100  	MOVOU X0, (CX)
5101  	MOVOU X1, 16(CX)
5102  	MOVOU X2, -32(CX)(R9*1)
5103  	MOVOU X3, -16(CX)(R9*1)
5104  
5105  memmove_end_copy_repeat_emit_encodeBlockAsm8B:
5106  	MOVQ SI, CX
5107  	JMP  emit_literal_done_repeat_emit_encodeBlockAsm8B
5108  
5109  memmove_long_repeat_emit_encodeBlockAsm8B:
5110  	LEAQ (CX)(R9*1), SI
5111  
5112  	// genMemMoveLong
5113  	MOVOU (R10), X0
5114  	MOVOU 16(R10), X1
5115  	MOVOU -32(R10)(R9*1), X2
5116  	MOVOU -16(R10)(R9*1), X3
5117  	MOVQ  R9, R12
5118  	SHRQ  $0x05, R12
5119  	MOVQ  CX, R11
5120  	ANDL  $0x0000001f, R11
5121  	MOVQ  $0x00000040, R13
5122  	SUBQ  R11, R13
5123  	DECQ  R12
5124  	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
5125  	LEAQ  -32(R10)(R13*1), R11
5126  	LEAQ  -32(CX)(R13*1), R14
5127  
5128  emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back:
5129  	MOVOU (R11), X4
5130  	MOVOU 16(R11), X5
5131  	MOVOA X4, (R14)
5132  	MOVOA X5, 16(R14)
5133  	ADDQ  $0x20, R14
5134  	ADDQ  $0x20, R11
5135  	ADDQ  $0x20, R13
5136  	DECQ  R12
5137  	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back
5138  
5139  emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
5140  	MOVOU -32(R10)(R13*1), X4
5141  	MOVOU -16(R10)(R13*1), X5
5142  	MOVOA X4, -32(CX)(R13*1)
5143  	MOVOA X5, -16(CX)(R13*1)
5144  	ADDQ  $0x20, R13
5145  	CMPQ  R9, R13
5146  	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
5147  	MOVOU X0, (CX)
5148  	MOVOU X1, 16(CX)
5149  	MOVOU X2, -32(CX)(R9*1)
5150  	MOVOU X3, -16(CX)(R9*1)
5151  	MOVQ  SI, CX
5152  
5153  emit_literal_done_repeat_emit_encodeBlockAsm8B:
5154  	ADDL $0x05, DX
5155  	MOVL DX, SI
5156  	SUBL 16(SP), SI
5157  	MOVQ src_len+32(FP), R9
5158  	SUBL DX, R9
5159  	LEAQ (BX)(DX*1), R10
5160  	LEAQ (BX)(SI*1), SI
5161  
5162  	// matchLen
5163  	XORL R12, R12
5164  
5165  matchlen_loopback_16_repeat_extend_encodeBlockAsm8B:
5166  	CMPL R9, $0x10
5167  	JB   matchlen_match8_repeat_extend_encodeBlockAsm8B
5168  	MOVQ (R10)(R12*1), R11
5169  	MOVQ 8(R10)(R12*1), R13
5170  	XORQ (SI)(R12*1), R11
5171  	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm8B
5172  	XORQ 8(SI)(R12*1), R13
5173  	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm8B
5174  	LEAL -16(R9), R9
5175  	LEAL 16(R12), R12
5176  	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm8B
5177  
5178  matchlen_bsf_16repeat_extend_encodeBlockAsm8B:
5179  #ifdef GOAMD64_v3
5180  	TZCNTQ R13, R13
5181  
5182  #else
5183  	BSFQ R13, R13
5184  
5185  #endif
5186  	SARQ $0x03, R13
5187  	LEAL 8(R12)(R13*1), R12
5188  	JMP  repeat_extend_forward_end_encodeBlockAsm8B
5189  
5190  matchlen_match8_repeat_extend_encodeBlockAsm8B:
5191  	CMPL R9, $0x08
5192  	JB   matchlen_match4_repeat_extend_encodeBlockAsm8B
5193  	MOVQ (R10)(R12*1), R11
5194  	XORQ (SI)(R12*1), R11
5195  	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm8B
5196  	LEAL -8(R9), R9
5197  	LEAL 8(R12), R12
5198  	JMP  matchlen_match4_repeat_extend_encodeBlockAsm8B
5199  
5200  matchlen_bsf_8_repeat_extend_encodeBlockAsm8B:
5201  #ifdef GOAMD64_v3
5202  	TZCNTQ R11, R11
5203  
5204  #else
5205  	BSFQ R11, R11
5206  
5207  #endif
5208  	SARQ $0x03, R11
5209  	LEAL (R12)(R11*1), R12
5210  	JMP  repeat_extend_forward_end_encodeBlockAsm8B
5211  
5212  matchlen_match4_repeat_extend_encodeBlockAsm8B:
5213  	CMPL R9, $0x04
5214  	JB   matchlen_match2_repeat_extend_encodeBlockAsm8B
5215  	MOVL (R10)(R12*1), R11
5216  	CMPL (SI)(R12*1), R11
5217  	JNE  matchlen_match2_repeat_extend_encodeBlockAsm8B
5218  	LEAL -4(R9), R9
5219  	LEAL 4(R12), R12
5220  
5221  matchlen_match2_repeat_extend_encodeBlockAsm8B:
5222  	CMPL R9, $0x01
5223  	JE   matchlen_match1_repeat_extend_encodeBlockAsm8B
5224  	JB   repeat_extend_forward_end_encodeBlockAsm8B
5225  	MOVW (R10)(R12*1), R11
5226  	CMPW (SI)(R12*1), R11
5227  	JNE  matchlen_match1_repeat_extend_encodeBlockAsm8B
5228  	LEAL 2(R12), R12
5229  	SUBL $0x02, R9
5230  	JZ   repeat_extend_forward_end_encodeBlockAsm8B
5231  
5232  matchlen_match1_repeat_extend_encodeBlockAsm8B:
5233  	MOVB (R10)(R12*1), R11
5234  	CMPB (SI)(R12*1), R11
5235  	JNE  repeat_extend_forward_end_encodeBlockAsm8B
5236  	LEAL 1(R12), R12
5237  
5238  repeat_extend_forward_end_encodeBlockAsm8B:
5239  	ADDL  R12, DX
5240  	MOVL  DX, SI
5241  	SUBL  DI, SI
5242  	MOVL  16(SP), DI
5243  	TESTL R8, R8
5244  	JZ    repeat_as_copy_encodeBlockAsm8B
5245  
5246  	// emitRepeat
5247  	MOVL SI, DI
5248  	LEAL -4(SI), SI
5249  	CMPL DI, $0x08
5250  	JBE  repeat_two_match_repeat_encodeBlockAsm8B
5251  	CMPL DI, $0x0c
5252  	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm8B
5253  
5254  cant_repeat_two_offset_match_repeat_encodeBlockAsm8B:
5255  	CMPL SI, $0x00000104
5256  	JB   repeat_three_match_repeat_encodeBlockAsm8B
5257  	LEAL -256(SI), SI
5258  	MOVW $0x0019, (CX)
5259  	MOVW SI, 2(CX)
5260  	ADDQ $0x04, CX
5261  	JMP  repeat_end_emit_encodeBlockAsm8B
5262  
5263  repeat_three_match_repeat_encodeBlockAsm8B:
5264  	LEAL -4(SI), SI
5265  	MOVW $0x0015, (CX)
5266  	MOVB SI, 2(CX)
5267  	ADDQ $0x03, CX
5268  	JMP  repeat_end_emit_encodeBlockAsm8B
5269  
5270  repeat_two_match_repeat_encodeBlockAsm8B:
5271  	SHLL $0x02, SI
5272  	ORL  $0x01, SI
5273  	MOVW SI, (CX)
5274  	ADDQ $0x02, CX
5275  	JMP  repeat_end_emit_encodeBlockAsm8B
5276  	XORQ R8, R8
5277  	LEAL 1(R8)(SI*4), SI
5278  	MOVB DI, 1(CX)
5279  	SARL $0x08, DI
5280  	SHLL $0x05, DI
5281  	ORL  DI, SI
5282  	MOVB SI, (CX)
5283  	ADDQ $0x02, CX
5284  	JMP  repeat_end_emit_encodeBlockAsm8B
5285  
5286  repeat_as_copy_encodeBlockAsm8B:
5287  	// emitCopy
5288  	CMPL SI, $0x40
5289  	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B
5290  	CMPL DI, $0x00000800
5291  	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm8B
5292  	MOVL $0x00000001, R8
5293  	LEAL 16(R8), R8
5294  	MOVB DI, 1(CX)
5295  	SHRL $0x08, DI
5296  	SHLL $0x05, DI
5297  	ORL  DI, R8
5298  	MOVB R8, (CX)
5299  	ADDQ $0x02, CX
5300  	SUBL $0x08, SI
5301  
5302  	// emitRepeat
5303  	LEAL -4(SI), SI
5304  	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
5305  	MOVL SI, DI
5306  	LEAL -4(SI), SI
5307  	CMPL DI, $0x08
5308  	JBE  repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
5309  	CMPL DI, $0x0c
5310  	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
5311  
5312  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
5313  	CMPL SI, $0x00000104
5314  	JB   repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
5315  	LEAL -256(SI), SI
5316  	MOVW $0x0019, (CX)
5317  	MOVW SI, 2(CX)
5318  	ADDQ $0x04, CX
5319  	JMP  repeat_end_emit_encodeBlockAsm8B
5320  
5321  repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
5322  	LEAL -4(SI), SI
5323  	MOVW $0x0015, (CX)
5324  	MOVB SI, 2(CX)
5325  	ADDQ $0x03, CX
5326  	JMP  repeat_end_emit_encodeBlockAsm8B
5327  
5328  repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
5329  	SHLL $0x02, SI
5330  	ORL  $0x01, SI
5331  	MOVW SI, (CX)
5332  	ADDQ $0x02, CX
5333  	JMP  repeat_end_emit_encodeBlockAsm8B
5334  	XORQ R8, R8
5335  	LEAL 1(R8)(SI*4), SI
5336  	MOVB DI, 1(CX)
5337  	SARL $0x08, DI
5338  	SHLL $0x05, DI
5339  	ORL  DI, SI
5340  	MOVB SI, (CX)
5341  	ADDQ $0x02, CX
5342  	JMP  repeat_end_emit_encodeBlockAsm8B
5343  
5344  long_offset_short_repeat_as_copy_encodeBlockAsm8B:
5345  	MOVB $0xee, (CX)
5346  	MOVW DI, 1(CX)
5347  	LEAL -60(SI), SI
5348  	ADDQ $0x03, CX
5349  
5350  	// emitRepeat
5351  	MOVL SI, DI
5352  	LEAL -4(SI), SI
5353  	CMPL DI, $0x08
5354  	JBE  repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
5355  	CMPL DI, $0x0c
5356  	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
5357  
5358  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
5359  	CMPL SI, $0x00000104
5360  	JB   repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
5361  	LEAL -256(SI), SI
5362  	MOVW $0x0019, (CX)
5363  	MOVW SI, 2(CX)
5364  	ADDQ $0x04, CX
5365  	JMP  repeat_end_emit_encodeBlockAsm8B
5366  
5367  repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
5368  	LEAL -4(SI), SI
5369  	MOVW $0x0015, (CX)
5370  	MOVB SI, 2(CX)
5371  	ADDQ $0x03, CX
5372  	JMP  repeat_end_emit_encodeBlockAsm8B
5373  
5374  repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
5375  	SHLL $0x02, SI
5376  	ORL  $0x01, SI
5377  	MOVW SI, (CX)
5378  	ADDQ $0x02, CX
5379  	JMP  repeat_end_emit_encodeBlockAsm8B
5380  	XORQ R8, R8
5381  	LEAL 1(R8)(SI*4), SI
5382  	MOVB DI, 1(CX)
5383  	SARL $0x08, DI
5384  	SHLL $0x05, DI
5385  	ORL  DI, SI
5386  	MOVB SI, (CX)
5387  	ADDQ $0x02, CX
5388  	JMP  repeat_end_emit_encodeBlockAsm8B
5389  
5390  two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B:
5391  	MOVL SI, R8
5392  	SHLL $0x02, R8
5393  	CMPL SI, $0x0c
5394  	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm8B
5395  	LEAL -15(R8), R8
5396  	MOVB DI, 1(CX)
5397  	SHRL $0x08, DI
5398  	SHLL $0x05, DI
5399  	ORL  DI, R8
5400  	MOVB R8, (CX)
5401  	ADDQ $0x02, CX
5402  	JMP  repeat_end_emit_encodeBlockAsm8B
5403  
5404  emit_copy_three_repeat_as_copy_encodeBlockAsm8B:
5405  	LEAL -2(R8), R8
5406  	MOVB R8, (CX)
5407  	MOVW DI, 1(CX)
5408  	ADDQ $0x03, CX
5409  
5410  repeat_end_emit_encodeBlockAsm8B:
5411  	MOVL DX, 12(SP)
5412  	JMP  search_loop_encodeBlockAsm8B
5413  
5414  no_repeat_found_encodeBlockAsm8B:
5415  	CMPL (BX)(SI*1), DI
5416  	JEQ  candidate_match_encodeBlockAsm8B
5417  	SHRQ $0x08, DI
5418  	MOVL (AX)(R10*4), SI
5419  	LEAL 2(DX), R9
5420  	CMPL (BX)(R8*1), DI
5421  	JEQ  candidate2_match_encodeBlockAsm8B
5422  	MOVL R9, (AX)(R10*4)
5423  	SHRQ $0x08, DI
5424  	CMPL (BX)(SI*1), DI
5425  	JEQ  candidate3_match_encodeBlockAsm8B
5426  	MOVL 20(SP), DX
5427  	JMP  search_loop_encodeBlockAsm8B
5428  
5429  candidate3_match_encodeBlockAsm8B:
5430  	ADDL $0x02, DX
5431  	JMP  candidate_match_encodeBlockAsm8B
5432  
5433  candidate2_match_encodeBlockAsm8B:
5434  	MOVL R9, (AX)(R10*4)
5435  	INCL DX
5436  	MOVL R8, SI
5437  
5438  candidate_match_encodeBlockAsm8B:
5439  	MOVL  12(SP), DI
5440  	TESTL SI, SI
5441  	JZ    match_extend_back_end_encodeBlockAsm8B
5442  
5443  match_extend_back_loop_encodeBlockAsm8B:
5444  	CMPL DX, DI
5445  	JBE  match_extend_back_end_encodeBlockAsm8B
5446  	MOVB -1(BX)(SI*1), R8
5447  	MOVB -1(BX)(DX*1), R9
5448  	CMPB R8, R9
5449  	JNE  match_extend_back_end_encodeBlockAsm8B
5450  	LEAL -1(DX), DX
5451  	DECL SI
5452  	JZ   match_extend_back_end_encodeBlockAsm8B
5453  	JMP  match_extend_back_loop_encodeBlockAsm8B
5454  
5455  match_extend_back_end_encodeBlockAsm8B:
5456  	MOVL DX, DI
5457  	SUBL 12(SP), DI
5458  	LEAQ 3(CX)(DI*1), DI
5459  	CMPQ DI, (SP)
5460  	JB   match_dst_size_check_encodeBlockAsm8B
5461  	MOVQ $0x00000000, ret+56(FP)
5462  	RET
5463  
5464  match_dst_size_check_encodeBlockAsm8B:
5465  	MOVL DX, DI
5466  	MOVL 12(SP), R8
5467  	CMPL R8, DI
5468  	JEQ  emit_literal_done_match_emit_encodeBlockAsm8B
5469  	MOVL DI, R9
5470  	MOVL DI, 12(SP)
5471  	LEAQ (BX)(R8*1), DI
5472  	SUBL R8, R9
5473  	LEAL -1(R9), R8
5474  	CMPL R8, $0x3c
5475  	JB   one_byte_match_emit_encodeBlockAsm8B
5476  	CMPL R8, $0x00000100
5477  	JB   two_bytes_match_emit_encodeBlockAsm8B
5478  	JB   three_bytes_match_emit_encodeBlockAsm8B
5479  
5480  three_bytes_match_emit_encodeBlockAsm8B:
5481  	MOVB $0xf4, (CX)
5482  	MOVW R8, 1(CX)
5483  	ADDQ $0x03, CX
5484  	JMP  memmove_long_match_emit_encodeBlockAsm8B
5485  
5486  two_bytes_match_emit_encodeBlockAsm8B:
5487  	MOVB $0xf0, (CX)
5488  	MOVB R8, 1(CX)
5489  	ADDQ $0x02, CX
5490  	CMPL R8, $0x40
5491  	JB   memmove_match_emit_encodeBlockAsm8B
5492  	JMP  memmove_long_match_emit_encodeBlockAsm8B
5493  
5494  one_byte_match_emit_encodeBlockAsm8B:
5495  	SHLB $0x02, R8
5496  	MOVB R8, (CX)
5497  	ADDQ $0x01, CX
5498  
5499  memmove_match_emit_encodeBlockAsm8B:
5500  	LEAQ (CX)(R9*1), R8
5501  
5502  	// genMemMoveShort
5503  	CMPQ R9, $0x08
5504  	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8
5505  	CMPQ R9, $0x10
5506  	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16
5507  	CMPQ R9, $0x20
5508  	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32
5509  	JMP  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64
5510  
5511  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8:
5512  	MOVQ (DI), R10
5513  	MOVQ R10, (CX)
5514  	JMP  memmove_end_copy_match_emit_encodeBlockAsm8B
5515  
5516  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16:
5517  	MOVQ (DI), R10
5518  	MOVQ -8(DI)(R9*1), DI
5519  	MOVQ R10, (CX)
5520  	MOVQ DI, -8(CX)(R9*1)
5521  	JMP  memmove_end_copy_match_emit_encodeBlockAsm8B
5522  
5523  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32:
5524  	MOVOU (DI), X0
5525  	MOVOU -16(DI)(R9*1), X1
5526  	MOVOU X0, (CX)
5527  	MOVOU X1, -16(CX)(R9*1)
5528  	JMP   memmove_end_copy_match_emit_encodeBlockAsm8B
5529  
5530  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64:
5531  	MOVOU (DI), X0
5532  	MOVOU 16(DI), X1
5533  	MOVOU -32(DI)(R9*1), X2
5534  	MOVOU -16(DI)(R9*1), X3
5535  	MOVOU X0, (CX)
5536  	MOVOU X1, 16(CX)
5537  	MOVOU X2, -32(CX)(R9*1)
5538  	MOVOU X3, -16(CX)(R9*1)
5539  
5540  memmove_end_copy_match_emit_encodeBlockAsm8B:
5541  	MOVQ R8, CX
5542  	JMP  emit_literal_done_match_emit_encodeBlockAsm8B
5543  
5544  memmove_long_match_emit_encodeBlockAsm8B:
5545  	LEAQ (CX)(R9*1), R8
5546  
5547  	// genMemMoveLong
5548  	MOVOU (DI), X0
5549  	MOVOU 16(DI), X1
5550  	MOVOU -32(DI)(R9*1), X2
5551  	MOVOU -16(DI)(R9*1), X3
5552  	MOVQ  R9, R11
5553  	SHRQ  $0x05, R11
5554  	MOVQ  CX, R10
5555  	ANDL  $0x0000001f, R10
5556  	MOVQ  $0x00000040, R12
5557  	SUBQ  R10, R12
5558  	DECQ  R11
5559  	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
5560  	LEAQ  -32(DI)(R12*1), R10
5561  	LEAQ  -32(CX)(R12*1), R13
5562  
5563  emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back:
5564  	MOVOU (R10), X4
5565  	MOVOU 16(R10), X5
5566  	MOVOA X4, (R13)
5567  	MOVOA X5, 16(R13)
5568  	ADDQ  $0x20, R13
5569  	ADDQ  $0x20, R10
5570  	ADDQ  $0x20, R12
5571  	DECQ  R11
5572  	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back
5573  
5574  emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
5575  	MOVOU -32(DI)(R12*1), X4
5576  	MOVOU -16(DI)(R12*1), X5
5577  	MOVOA X4, -32(CX)(R12*1)
5578  	MOVOA X5, -16(CX)(R12*1)
5579  	ADDQ  $0x20, R12
5580  	CMPQ  R9, R12
5581  	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
5582  	MOVOU X0, (CX)
5583  	MOVOU X1, 16(CX)
5584  	MOVOU X2, -32(CX)(R9*1)
5585  	MOVOU X3, -16(CX)(R9*1)
5586  	MOVQ  R8, CX
5587  
5588  emit_literal_done_match_emit_encodeBlockAsm8B:
5589  match_nolit_loop_encodeBlockAsm8B:
5590  	MOVL DX, DI
5591  	SUBL SI, DI
5592  	MOVL DI, 16(SP)
5593  	ADDL $0x04, DX
5594  	ADDL $0x04, SI
5595  	MOVQ src_len+32(FP), DI
5596  	SUBL DX, DI
5597  	LEAQ (BX)(DX*1), R8
5598  	LEAQ (BX)(SI*1), SI
5599  
5600  	// matchLen
5601  	XORL R10, R10
5602  
5603  matchlen_loopback_16_match_nolit_encodeBlockAsm8B:
5604  	CMPL DI, $0x10
5605  	JB   matchlen_match8_match_nolit_encodeBlockAsm8B
5606  	MOVQ (R8)(R10*1), R9
5607  	MOVQ 8(R8)(R10*1), R11
5608  	XORQ (SI)(R10*1), R9
5609  	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm8B
5610  	XORQ 8(SI)(R10*1), R11
5611  	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm8B
5612  	LEAL -16(DI), DI
5613  	LEAL 16(R10), R10
5614  	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm8B
5615  
5616  matchlen_bsf_16match_nolit_encodeBlockAsm8B:
5617  #ifdef GOAMD64_v3
5618  	TZCNTQ R11, R11
5619  
5620  #else
5621  	BSFQ R11, R11
5622  
5623  #endif
5624  	SARQ $0x03, R11
5625  	LEAL 8(R10)(R11*1), R10
5626  	JMP  match_nolit_end_encodeBlockAsm8B
5627  
5628  matchlen_match8_match_nolit_encodeBlockAsm8B:
5629  	CMPL DI, $0x08
5630  	JB   matchlen_match4_match_nolit_encodeBlockAsm8B
5631  	MOVQ (R8)(R10*1), R9
5632  	XORQ (SI)(R10*1), R9
5633  	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm8B
5634  	LEAL -8(DI), DI
5635  	LEAL 8(R10), R10
5636  	JMP  matchlen_match4_match_nolit_encodeBlockAsm8B
5637  
5638  matchlen_bsf_8_match_nolit_encodeBlockAsm8B:
5639  #ifdef GOAMD64_v3
5640  	TZCNTQ R9, R9
5641  
5642  #else
5643  	BSFQ R9, R9
5644  
5645  #endif
5646  	SARQ $0x03, R9
5647  	LEAL (R10)(R9*1), R10
5648  	JMP  match_nolit_end_encodeBlockAsm8B
5649  
5650  matchlen_match4_match_nolit_encodeBlockAsm8B:
5651  	CMPL DI, $0x04
5652  	JB   matchlen_match2_match_nolit_encodeBlockAsm8B
5653  	MOVL (R8)(R10*1), R9
5654  	CMPL (SI)(R10*1), R9
5655  	JNE  matchlen_match2_match_nolit_encodeBlockAsm8B
5656  	LEAL -4(DI), DI
5657  	LEAL 4(R10), R10
5658  
5659  matchlen_match2_match_nolit_encodeBlockAsm8B:
5660  	CMPL DI, $0x01
5661  	JE   matchlen_match1_match_nolit_encodeBlockAsm8B
5662  	JB   match_nolit_end_encodeBlockAsm8B
5663  	MOVW (R8)(R10*1), R9
5664  	CMPW (SI)(R10*1), R9
5665  	JNE  matchlen_match1_match_nolit_encodeBlockAsm8B
5666  	LEAL 2(R10), R10
5667  	SUBL $0x02, DI
5668  	JZ   match_nolit_end_encodeBlockAsm8B
5669  
5670  matchlen_match1_match_nolit_encodeBlockAsm8B:
5671  	MOVB (R8)(R10*1), R9
5672  	CMPB (SI)(R10*1), R9
5673  	JNE  match_nolit_end_encodeBlockAsm8B
5674  	LEAL 1(R10), R10
5675  
5676  match_nolit_end_encodeBlockAsm8B:
5677  	ADDL R10, DX
5678  	MOVL 16(SP), SI
5679  	ADDL $0x04, R10
5680  	MOVL DX, 12(SP)
5681  
5682  	// emitCopy
5683  	CMPL R10, $0x40
5684  	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm8B
5685  	CMPL SI, $0x00000800
5686  	JAE  long_offset_short_match_nolit_encodeBlockAsm8B
5687  	MOVL $0x00000001, DI
5688  	LEAL 16(DI), DI
5689  	MOVB SI, 1(CX)
5690  	SHRL $0x08, SI
5691  	SHLL $0x05, SI
5692  	ORL  SI, DI
5693  	MOVB DI, (CX)
5694  	ADDQ $0x02, CX
5695  	SUBL $0x08, R10
5696  
5697  	// emitRepeat
5698  	LEAL -4(R10), R10
5699  	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
5700  	MOVL R10, SI
5701  	LEAL -4(R10), R10
5702  	CMPL SI, $0x08
5703  	JBE  repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
5704  	CMPL SI, $0x0c
5705  	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
5706  
5707  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
5708  	CMPL R10, $0x00000104
5709  	JB   repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
5710  	LEAL -256(R10), R10
5711  	MOVW $0x0019, (CX)
5712  	MOVW R10, 2(CX)
5713  	ADDQ $0x04, CX
5714  	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
5715  
5716  repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
5717  	LEAL -4(R10), R10
5718  	MOVW $0x0015, (CX)
5719  	MOVB R10, 2(CX)
5720  	ADDQ $0x03, CX
5721  	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
5722  
5723  repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
5724  	SHLL $0x02, R10
5725  	ORL  $0x01, R10
5726  	MOVW R10, (CX)
5727  	ADDQ $0x02, CX
5728  	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
5729  	XORQ DI, DI
5730  	LEAL 1(DI)(R10*4), R10
5731  	MOVB SI, 1(CX)
5732  	SARL $0x08, SI
5733  	SHLL $0x05, SI
5734  	ORL  SI, R10
5735  	MOVB R10, (CX)
5736  	ADDQ $0x02, CX
5737  	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
5738  
5739  long_offset_short_match_nolit_encodeBlockAsm8B:
5740  	MOVB $0xee, (CX)
5741  	MOVW SI, 1(CX)
5742  	LEAL -60(R10), R10
5743  	ADDQ $0x03, CX
5744  
5745  	// emitRepeat
5746  	MOVL R10, SI
5747  	LEAL -4(R10), R10
5748  	CMPL SI, $0x08
5749  	JBE  repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short
5750  	CMPL SI, $0x0c
5751  	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short
5752  
5753  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short:
5754  	CMPL R10, $0x00000104
5755  	JB   repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short
5756  	LEAL -256(R10), R10
5757  	MOVW $0x0019, (CX)
5758  	MOVW R10, 2(CX)
5759  	ADDQ $0x04, CX
5760  	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
5761  
5762  repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short:
5763  	LEAL -4(R10), R10
5764  	MOVW $0x0015, (CX)
5765  	MOVB R10, 2(CX)
5766  	ADDQ $0x03, CX
5767  	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
5768  
5769  repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short:
5770  	SHLL $0x02, R10
5771  	ORL  $0x01, R10
5772  	MOVW R10, (CX)
5773  	ADDQ $0x02, CX
5774  	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
5775  	XORQ DI, DI
5776  	LEAL 1(DI)(R10*4), R10
5777  	MOVB SI, 1(CX)
5778  	SARL $0x08, SI
5779  	SHLL $0x05, SI
5780  	ORL  SI, R10
5781  	MOVB R10, (CX)
5782  	ADDQ $0x02, CX
5783  	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
5784  
5785  two_byte_offset_short_match_nolit_encodeBlockAsm8B:
5786  	MOVL R10, DI
5787  	SHLL $0x02, DI
5788  	CMPL R10, $0x0c
5789  	JAE  emit_copy_three_match_nolit_encodeBlockAsm8B
5790  	LEAL -15(DI), DI
5791  	MOVB SI, 1(CX)
5792  	SHRL $0x08, SI
5793  	SHLL $0x05, SI
5794  	ORL  SI, DI
5795  	MOVB DI, (CX)
5796  	ADDQ $0x02, CX
5797  	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
5798  
5799  emit_copy_three_match_nolit_encodeBlockAsm8B:
5800  	LEAL -2(DI), DI
5801  	MOVB DI, (CX)
5802  	MOVW SI, 1(CX)
5803  	ADDQ $0x03, CX
5804  
5805  match_nolit_emitcopy_end_encodeBlockAsm8B:
5806  	CMPL DX, 8(SP)
5807  	JAE  emit_remainder_encodeBlockAsm8B
5808  	MOVQ -2(BX)(DX*1), DI
5809  	CMPQ CX, (SP)
5810  	JB   match_nolit_dst_ok_encodeBlockAsm8B
5811  	MOVQ $0x00000000, ret+56(FP)
5812  	RET
5813  
5814  match_nolit_dst_ok_encodeBlockAsm8B:
5815  	MOVQ  $0x9e3779b1, R9
5816  	MOVQ  DI, R8
5817  	SHRQ  $0x10, DI
5818  	MOVQ  DI, SI
5819  	SHLQ  $0x20, R8
5820  	IMULQ R9, R8
5821  	SHRQ  $0x38, R8
5822  	SHLQ  $0x20, SI
5823  	IMULQ R9, SI
5824  	SHRQ  $0x38, SI
5825  	LEAL  -2(DX), R9
5826  	LEAQ  (AX)(SI*4), R10
5827  	MOVL  (R10), SI
5828  	MOVL  R9, (AX)(R8*4)
5829  	MOVL  DX, (R10)
5830  	CMPL  (BX)(SI*1), DI
5831  	JEQ   match_nolit_loop_encodeBlockAsm8B
5832  	INCL  DX
5833  	JMP   search_loop_encodeBlockAsm8B
5834  
5835  emit_remainder_encodeBlockAsm8B:
5836  	MOVQ src_len+32(FP), AX
5837  	SUBL 12(SP), AX
5838  	LEAQ 3(CX)(AX*1), AX
5839  	CMPQ AX, (SP)
5840  	JB   emit_remainder_ok_encodeBlockAsm8B
5841  	MOVQ $0x00000000, ret+56(FP)
5842  	RET
5843  
5844  emit_remainder_ok_encodeBlockAsm8B:
5845  	MOVQ src_len+32(FP), AX
5846  	MOVL 12(SP), DX
5847  	CMPL DX, AX
5848  	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm8B
5849  	MOVL AX, SI
5850  	MOVL AX, 12(SP)
5851  	LEAQ (BX)(DX*1), AX
5852  	SUBL DX, SI
5853  	LEAL -1(SI), DX
5854  	CMPL DX, $0x3c
5855  	JB   one_byte_emit_remainder_encodeBlockAsm8B
5856  	CMPL DX, $0x00000100
5857  	JB   two_bytes_emit_remainder_encodeBlockAsm8B
5858  	JB   three_bytes_emit_remainder_encodeBlockAsm8B
5859  
5860  three_bytes_emit_remainder_encodeBlockAsm8B:
5861  	MOVB $0xf4, (CX)
5862  	MOVW DX, 1(CX)
5863  	ADDQ $0x03, CX
5864  	JMP  memmove_long_emit_remainder_encodeBlockAsm8B
5865  
5866  two_bytes_emit_remainder_encodeBlockAsm8B:
5867  	MOVB $0xf0, (CX)
5868  	MOVB DL, 1(CX)
5869  	ADDQ $0x02, CX
5870  	CMPL DX, $0x40
5871  	JB   memmove_emit_remainder_encodeBlockAsm8B
5872  	JMP  memmove_long_emit_remainder_encodeBlockAsm8B
5873  
5874  one_byte_emit_remainder_encodeBlockAsm8B:
5875  	SHLB $0x02, DL
5876  	MOVB DL, (CX)
5877  	ADDQ $0x01, CX
5878  
5879  memmove_emit_remainder_encodeBlockAsm8B:
5880  	LEAQ (CX)(SI*1), DX
5881  	MOVL SI, BX
5882  
5883  	// genMemMoveShort
5884  	CMPQ BX, $0x03
5885  	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2
5886  	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3
5887  	CMPQ BX, $0x08
5888  	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7
5889  	CMPQ BX, $0x10
5890  	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16
5891  	CMPQ BX, $0x20
5892  	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32
5893  	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64
5894  
5895  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2:
5896  	MOVB (AX), SI
5897  	MOVB -1(AX)(BX*1), AL
5898  	MOVB SI, (CX)
5899  	MOVB AL, -1(CX)(BX*1)
5900  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
5901  
5902  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3:
5903  	MOVW (AX), SI
5904  	MOVB 2(AX), AL
5905  	MOVW SI, (CX)
5906  	MOVB AL, 2(CX)
5907  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
5908  
5909  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7:
5910  	MOVL (AX), SI
5911  	MOVL -4(AX)(BX*1), AX
5912  	MOVL SI, (CX)
5913  	MOVL AX, -4(CX)(BX*1)
5914  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
5915  
5916  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16:
5917  	MOVQ (AX), SI
5918  	MOVQ -8(AX)(BX*1), AX
5919  	MOVQ SI, (CX)
5920  	MOVQ AX, -8(CX)(BX*1)
5921  	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
5922  
5923  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32:
5924  	MOVOU (AX), X0
5925  	MOVOU -16(AX)(BX*1), X1
5926  	MOVOU X0, (CX)
5927  	MOVOU X1, -16(CX)(BX*1)
5928  	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm8B
5929  
5930  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64:
5931  	MOVOU (AX), X0
5932  	MOVOU 16(AX), X1
5933  	MOVOU -32(AX)(BX*1), X2
5934  	MOVOU -16(AX)(BX*1), X3
5935  	MOVOU X0, (CX)
5936  	MOVOU X1, 16(CX)
5937  	MOVOU X2, -32(CX)(BX*1)
5938  	MOVOU X3, -16(CX)(BX*1)
5939  
5940  memmove_end_copy_emit_remainder_encodeBlockAsm8B:
5941  	MOVQ DX, CX
5942  	JMP  emit_literal_done_emit_remainder_encodeBlockAsm8B
5943  
5944  memmove_long_emit_remainder_encodeBlockAsm8B:
5945  	LEAQ (CX)(SI*1), DX
5946  	MOVL SI, BX
5947  
5948  	// genMemMoveLong
5949  	MOVOU (AX), X0
5950  	MOVOU 16(AX), X1
5951  	MOVOU -32(AX)(BX*1), X2
5952  	MOVOU -16(AX)(BX*1), X3
5953  	MOVQ  BX, DI
5954  	SHRQ  $0x05, DI
5955  	MOVQ  CX, SI
5956  	ANDL  $0x0000001f, SI
5957  	MOVQ  $0x00000040, R8
5958  	SUBQ  SI, R8
5959  	DECQ  DI
5960  	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
5961  	LEAQ  -32(AX)(R8*1), SI
5962  	LEAQ  -32(CX)(R8*1), R9
5963  
5964  emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back:
5965  	MOVOU (SI), X4
5966  	MOVOU 16(SI), X5
5967  	MOVOA X4, (R9)
5968  	MOVOA X5, 16(R9)
5969  	ADDQ  $0x20, R9
5970  	ADDQ  $0x20, SI
5971  	ADDQ  $0x20, R8
5972  	DECQ  DI
5973  	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back
5974  
5975  emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32:
5976  	MOVOU -32(AX)(R8*1), X4
5977  	MOVOU -16(AX)(R8*1), X5
5978  	MOVOA X4, -32(CX)(R8*1)
5979  	MOVOA X5, -16(CX)(R8*1)
5980  	ADDQ  $0x20, R8
5981  	CMPQ  BX, R8
5982  	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
5983  	MOVOU X0, (CX)
5984  	MOVOU X1, 16(CX)
5985  	MOVOU X2, -32(CX)(BX*1)
5986  	MOVOU X3, -16(CX)(BX*1)
5987  	MOVQ  DX, CX
5988  
5989  emit_literal_done_emit_remainder_encodeBlockAsm8B:
5990  	MOVQ dst_base+0(FP), AX
5991  	SUBQ AX, CX
5992  	MOVQ CX, ret+56(FP)
5993  	RET
5994  
5995  // func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int
5996  // Requires: BMI, SSE2
5997  TEXT ·encodeBetterBlockAsm(SB), $24-64
5998  	MOVQ tmp+48(FP), AX
5999  	MOVQ dst_base+0(FP), CX
6000  	MOVQ $0x00001200, DX
6001  	MOVQ AX, BX
6002  	PXOR X0, X0
6003  
6004  zero_loop_encodeBetterBlockAsm:
6005  	MOVOU X0, (BX)
6006  	MOVOU X0, 16(BX)
6007  	MOVOU X0, 32(BX)
6008  	MOVOU X0, 48(BX)
6009  	MOVOU X0, 64(BX)
6010  	MOVOU X0, 80(BX)
6011  	MOVOU X0, 96(BX)
6012  	MOVOU X0, 112(BX)
6013  	ADDQ  $0x80, BX
6014  	DECQ  DX
6015  	JNZ   zero_loop_encodeBetterBlockAsm
6016  	MOVL  $0x00000000, 12(SP)
6017  	MOVQ  src_len+32(FP), DX
6018  	LEAQ  -6(DX), BX
6019  	LEAQ  -8(DX), SI
6020  	MOVL  SI, 8(SP)
6021  	SHRQ  $0x05, DX
6022  	SUBL  DX, BX
6023  	LEAQ  (CX)(BX*1), BX
6024  	MOVQ  BX, (SP)
6025  	MOVL  $0x00000001, DX
6026  	MOVL  $0x00000000, 16(SP)
6027  	MOVQ  src_base+24(FP), BX
6028  
6029  search_loop_encodeBetterBlockAsm:
6030  	MOVL DX, SI
6031  	SUBL 12(SP), SI
6032  	SHRL $0x07, SI
6033  	CMPL SI, $0x63
6034  	JBE  check_maxskip_ok_encodeBetterBlockAsm
6035  	LEAL 100(DX), SI
6036  	JMP  check_maxskip_cont_encodeBetterBlockAsm
6037  
6038  check_maxskip_ok_encodeBetterBlockAsm:
6039  	LEAL 1(DX)(SI*1), SI
6040  
6041  check_maxskip_cont_encodeBetterBlockAsm:
6042  	CMPL  SI, 8(SP)
6043  	JAE   emit_remainder_encodeBetterBlockAsm
6044  	MOVQ  (BX)(DX*1), DI
6045  	MOVL  SI, 20(SP)
6046  	MOVQ  $0x00cf1bbcdcbfa563, R9
6047  	MOVQ  $0x9e3779b1, SI
6048  	MOVQ  DI, R10
6049  	MOVQ  DI, R11
6050  	SHLQ  $0x08, R10
6051  	IMULQ R9, R10
6052  	SHRQ  $0x2f, R10
6053  	SHLQ  $0x20, R11
6054  	IMULQ SI, R11
6055  	SHRQ  $0x32, R11
6056  	MOVL  (AX)(R10*4), SI
6057  	MOVL  524288(AX)(R11*4), R8
6058  	MOVL  DX, (AX)(R10*4)
6059  	MOVL  DX, 524288(AX)(R11*4)
6060  	MOVQ  (BX)(SI*1), R10
6061  	MOVQ  (BX)(R8*1), R11
6062  	CMPQ  R10, DI
6063  	JEQ   candidate_match_encodeBetterBlockAsm
6064  	CMPQ  R11, DI
6065  	JNE   no_short_found_encodeBetterBlockAsm
6066  	MOVL  R8, SI
6067  	JMP   candidate_match_encodeBetterBlockAsm
6068  
6069  no_short_found_encodeBetterBlockAsm:
6070  	CMPL R10, DI
6071  	JEQ  candidate_match_encodeBetterBlockAsm
6072  	CMPL R11, DI
6073  	JEQ  candidateS_match_encodeBetterBlockAsm
6074  	MOVL 20(SP), DX
6075  	JMP  search_loop_encodeBetterBlockAsm
6076  
6077  candidateS_match_encodeBetterBlockAsm:
6078  	SHRQ  $0x08, DI
6079  	MOVQ  DI, R10
6080  	SHLQ  $0x08, R10
6081  	IMULQ R9, R10
6082  	SHRQ  $0x2f, R10
6083  	MOVL  (AX)(R10*4), SI
6084  	INCL  DX
6085  	MOVL  DX, (AX)(R10*4)
6086  	CMPL  (BX)(SI*1), DI
6087  	JEQ   candidate_match_encodeBetterBlockAsm
6088  	DECL  DX
6089  	MOVL  R8, SI
6090  
6091  candidate_match_encodeBetterBlockAsm:
6092  	MOVL  12(SP), DI
6093  	TESTL SI, SI
6094  	JZ    match_extend_back_end_encodeBetterBlockAsm
6095  
6096  match_extend_back_loop_encodeBetterBlockAsm:
6097  	CMPL DX, DI
6098  	JBE  match_extend_back_end_encodeBetterBlockAsm
6099  	MOVB -1(BX)(SI*1), R8
6100  	MOVB -1(BX)(DX*1), R9
6101  	CMPB R8, R9
6102  	JNE  match_extend_back_end_encodeBetterBlockAsm
6103  	LEAL -1(DX), DX
6104  	DECL SI
6105  	JZ   match_extend_back_end_encodeBetterBlockAsm
6106  	JMP  match_extend_back_loop_encodeBetterBlockAsm
6107  
6108  match_extend_back_end_encodeBetterBlockAsm:
6109  	MOVL DX, DI
6110  	SUBL 12(SP), DI
6111  	LEAQ 5(CX)(DI*1), DI
6112  	CMPQ DI, (SP)
6113  	JB   match_dst_size_check_encodeBetterBlockAsm
6114  	MOVQ $0x00000000, ret+56(FP)
6115  	RET
6116  
6117  match_dst_size_check_encodeBetterBlockAsm:
6118  	MOVL DX, DI
6119  	ADDL $0x04, DX
6120  	ADDL $0x04, SI
6121  	MOVQ src_len+32(FP), R8
6122  	SUBL DX, R8
6123  	LEAQ (BX)(DX*1), R9
6124  	LEAQ (BX)(SI*1), R10
6125  
6126  	// matchLen
6127  	XORL R12, R12
6128  
6129  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm:
6130  	CMPL R8, $0x10
6131  	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm
6132  	MOVQ (R9)(R12*1), R11
6133  	MOVQ 8(R9)(R12*1), R13
6134  	XORQ (R10)(R12*1), R11
6135  	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
6136  	XORQ 8(R10)(R12*1), R13
6137  	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm
6138  	LEAL -16(R8), R8
6139  	LEAL 16(R12), R12
6140  	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm
6141  
6142  matchlen_bsf_16match_nolit_encodeBetterBlockAsm:
6143  #ifdef GOAMD64_v3
6144  	TZCNTQ R13, R13
6145  
6146  #else
6147  	BSFQ R13, R13
6148  
6149  #endif
6150  	SARQ $0x03, R13
6151  	LEAL 8(R12)(R13*1), R12
6152  	JMP  match_nolit_end_encodeBetterBlockAsm
6153  
6154  matchlen_match8_match_nolit_encodeBetterBlockAsm:
6155  	CMPL R8, $0x08
6156  	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm
6157  	MOVQ (R9)(R12*1), R11
6158  	XORQ (R10)(R12*1), R11
6159  	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
6160  	LEAL -8(R8), R8
6161  	LEAL 8(R12), R12
6162  	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm
6163  
6164  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm:
6165  #ifdef GOAMD64_v3
6166  	TZCNTQ R11, R11
6167  
6168  #else
6169  	BSFQ R11, R11
6170  
6171  #endif
6172  	SARQ $0x03, R11
6173  	LEAL (R12)(R11*1), R12
6174  	JMP  match_nolit_end_encodeBetterBlockAsm
6175  
6176  matchlen_match4_match_nolit_encodeBetterBlockAsm:
6177  	CMPL R8, $0x04
6178  	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm
6179  	MOVL (R9)(R12*1), R11
6180  	CMPL (R10)(R12*1), R11
6181  	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm
6182  	LEAL -4(R8), R8
6183  	LEAL 4(R12), R12
6184  
6185  matchlen_match2_match_nolit_encodeBetterBlockAsm:
6186  	CMPL R8, $0x01
6187  	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm
6188  	JB   match_nolit_end_encodeBetterBlockAsm
6189  	MOVW (R9)(R12*1), R11
6190  	CMPW (R10)(R12*1), R11
6191  	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm
6192  	LEAL 2(R12), R12
6193  	SUBL $0x02, R8
6194  	JZ   match_nolit_end_encodeBetterBlockAsm
6195  
6196  matchlen_match1_match_nolit_encodeBetterBlockAsm:
6197  	MOVB (R9)(R12*1), R11
6198  	CMPB (R10)(R12*1), R11
6199  	JNE  match_nolit_end_encodeBetterBlockAsm
6200  	LEAL 1(R12), R12
6201  
6202  match_nolit_end_encodeBetterBlockAsm:
6203  	MOVL DX, R8
6204  	SUBL SI, R8
6205  
6206  	// Check if repeat
6207  	CMPL 16(SP), R8
6208  	JEQ  match_is_repeat_encodeBetterBlockAsm
6209  	CMPL R12, $0x01
6210  	JA   match_length_ok_encodeBetterBlockAsm
6211  	CMPL R8, $0x0000ffff
6212  	JBE  match_length_ok_encodeBetterBlockAsm
6213  	MOVL 20(SP), DX
6214  	INCL DX
6215  	JMP  search_loop_encodeBetterBlockAsm
6216  
6217  match_length_ok_encodeBetterBlockAsm:
6218  	MOVL R8, 16(SP)
6219  	MOVL 12(SP), SI
6220  	CMPL SI, DI
6221  	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm
6222  	MOVL DI, R9
6223  	MOVL DI, 12(SP)
6224  	LEAQ (BX)(SI*1), R10
6225  	SUBL SI, R9
6226  	LEAL -1(R9), SI
6227  	CMPL SI, $0x3c
6228  	JB   one_byte_match_emit_encodeBetterBlockAsm
6229  	CMPL SI, $0x00000100
6230  	JB   two_bytes_match_emit_encodeBetterBlockAsm
6231  	CMPL SI, $0x00010000
6232  	JB   three_bytes_match_emit_encodeBetterBlockAsm
6233  	CMPL SI, $0x01000000
6234  	JB   four_bytes_match_emit_encodeBetterBlockAsm
6235  	MOVB $0xfc, (CX)
6236  	MOVL SI, 1(CX)
6237  	ADDQ $0x05, CX
6238  	JMP  memmove_long_match_emit_encodeBetterBlockAsm
6239  
6240  four_bytes_match_emit_encodeBetterBlockAsm:
6241  	MOVL SI, R11
6242  	SHRL $0x10, R11
6243  	MOVB $0xf8, (CX)
6244  	MOVW SI, 1(CX)
6245  	MOVB R11, 3(CX)
6246  	ADDQ $0x04, CX
6247  	JMP  memmove_long_match_emit_encodeBetterBlockAsm
6248  
6249  three_bytes_match_emit_encodeBetterBlockAsm:
6250  	MOVB $0xf4, (CX)
6251  	MOVW SI, 1(CX)
6252  	ADDQ $0x03, CX
6253  	JMP  memmove_long_match_emit_encodeBetterBlockAsm
6254  
6255  two_bytes_match_emit_encodeBetterBlockAsm:
6256  	MOVB $0xf0, (CX)
6257  	MOVB SI, 1(CX)
6258  	ADDQ $0x02, CX
6259  	CMPL SI, $0x40
6260  	JB   memmove_match_emit_encodeBetterBlockAsm
6261  	JMP  memmove_long_match_emit_encodeBetterBlockAsm
6262  
6263  one_byte_match_emit_encodeBetterBlockAsm:
6264  	SHLB $0x02, SI
6265  	MOVB SI, (CX)
6266  	ADDQ $0x01, CX
6267  
6268  memmove_match_emit_encodeBetterBlockAsm:
6269  	LEAQ (CX)(R9*1), SI
6270  
6271  	// genMemMoveShort
6272  	CMPQ R9, $0x04
6273  	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4
6274  	CMPQ R9, $0x08
6275  	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7
6276  	CMPQ R9, $0x10
6277  	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16
6278  	CMPQ R9, $0x20
6279  	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32
6280  	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64
6281  
6282  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4:
6283  	MOVL (R10), R11
6284  	MOVL R11, (CX)
6285  	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
6286  
6287  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7:
6288  	MOVL (R10), R11
6289  	MOVL -4(R10)(R9*1), R10
6290  	MOVL R11, (CX)
6291  	MOVL R10, -4(CX)(R9*1)
6292  	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
6293  
6294  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16:
6295  	MOVQ (R10), R11
6296  	MOVQ -8(R10)(R9*1), R10
6297  	MOVQ R11, (CX)
6298  	MOVQ R10, -8(CX)(R9*1)
6299  	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
6300  
6301  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32:
6302  	MOVOU (R10), X0
6303  	MOVOU -16(R10)(R9*1), X1
6304  	MOVOU X0, (CX)
6305  	MOVOU X1, -16(CX)(R9*1)
6306  	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm
6307  
6308  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64:
6309  	MOVOU (R10), X0
6310  	MOVOU 16(R10), X1
6311  	MOVOU -32(R10)(R9*1), X2
6312  	MOVOU -16(R10)(R9*1), X3
6313  	MOVOU X0, (CX)
6314  	MOVOU X1, 16(CX)
6315  	MOVOU X2, -32(CX)(R9*1)
6316  	MOVOU X3, -16(CX)(R9*1)
6317  
6318  memmove_end_copy_match_emit_encodeBetterBlockAsm:
6319  	MOVQ SI, CX
6320  	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm
6321  
6322  memmove_long_match_emit_encodeBetterBlockAsm:
6323  	LEAQ (CX)(R9*1), SI
6324  
6325  	// genMemMoveLong
6326  	MOVOU (R10), X0
6327  	MOVOU 16(R10), X1
6328  	MOVOU -32(R10)(R9*1), X2
6329  	MOVOU -16(R10)(R9*1), X3
6330  	MOVQ  R9, R13
6331  	SHRQ  $0x05, R13
6332  	MOVQ  CX, R11
6333  	ANDL  $0x0000001f, R11
6334  	MOVQ  $0x00000040, R14
6335  	SUBQ  R11, R14
6336  	DECQ  R13
6337  	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
6338  	LEAQ  -32(R10)(R14*1), R11
6339  	LEAQ  -32(CX)(R14*1), R15
6340  
6341  emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back:
6342  	MOVOU (R11), X4
6343  	MOVOU 16(R11), X5
6344  	MOVOA X4, (R15)
6345  	MOVOA X5, 16(R15)
6346  	ADDQ  $0x20, R15
6347  	ADDQ  $0x20, R11
6348  	ADDQ  $0x20, R14
6349  	DECQ  R13
6350  	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back
6351  
6352  emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32:
6353  	MOVOU -32(R10)(R14*1), X4
6354  	MOVOU -16(R10)(R14*1), X5
6355  	MOVOA X4, -32(CX)(R14*1)
6356  	MOVOA X5, -16(CX)(R14*1)
6357  	ADDQ  $0x20, R14
6358  	CMPQ  R9, R14
6359  	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
6360  	MOVOU X0, (CX)
6361  	MOVOU X1, 16(CX)
6362  	MOVOU X2, -32(CX)(R9*1)
6363  	MOVOU X3, -16(CX)(R9*1)
6364  	MOVQ  SI, CX
6365  
6366  emit_literal_done_match_emit_encodeBetterBlockAsm:
6367  	ADDL R12, DX
6368  	ADDL $0x04, R12
6369  	MOVL DX, 12(SP)
6370  
6371  	// emitCopy
6372  	CMPL R8, $0x00010000
6373  	JB   two_byte_offset_match_nolit_encodeBetterBlockAsm
6374  	CMPL R12, $0x40
6375  	JBE  four_bytes_remain_match_nolit_encodeBetterBlockAsm
6376  	MOVB $0xff, (CX)
6377  	MOVL R8, 1(CX)
6378  	LEAL -64(R12), R12
6379  	ADDQ $0x05, CX
6380  	CMPL R12, $0x04
6381  	JB   four_bytes_remain_match_nolit_encodeBetterBlockAsm
6382  
6383  	// emitRepeat
6384  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy:
6385  	MOVL R12, SI
6386  	LEAL -4(R12), R12
6387  	CMPL SI, $0x08
6388  	JBE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy
6389  	CMPL SI, $0x0c
6390  	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
6391  	CMPL R8, $0x00000800
6392  	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
6393  
6394  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
6395  	CMPL R12, $0x00000104
6396  	JB   repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy
6397  	CMPL R12, $0x00010100
6398  	JB   repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy
6399  	CMPL R12, $0x0100ffff
6400  	JB   repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy
6401  	LEAL -16842747(R12), R12
6402  	MOVL $0xfffb001d, (CX)
6403  	MOVB $0xff, 4(CX)
6404  	ADDQ $0x05, CX
6405  	JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy
6406  
6407  repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy:
6408  	LEAL -65536(R12), R12
6409  	MOVL R12, R8
6410  	MOVW $0x001d, (CX)
6411  	MOVW R12, 2(CX)
6412  	SARL $0x10, R8
6413  	MOVB R8, 4(CX)
6414  	ADDQ $0x05, CX
6415  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6416  
6417  repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy:
6418  	LEAL -256(R12), R12
6419  	MOVW $0x0019, (CX)
6420  	MOVW R12, 2(CX)
6421  	ADDQ $0x04, CX
6422  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6423  
6424  repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy:
6425  	LEAL -4(R12), R12
6426  	MOVW $0x0015, (CX)
6427  	MOVB R12, 2(CX)
6428  	ADDQ $0x03, CX
6429  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6430  
6431  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy:
6432  	SHLL $0x02, R12
6433  	ORL  $0x01, R12
6434  	MOVW R12, (CX)
6435  	ADDQ $0x02, CX
6436  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6437  
6438  repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
6439  	XORQ SI, SI
6440  	LEAL 1(SI)(R12*4), R12
6441  	MOVB R8, 1(CX)
6442  	SARL $0x08, R8
6443  	SHLL $0x05, R8
6444  	ORL  R8, R12
6445  	MOVB R12, (CX)
6446  	ADDQ $0x02, CX
6447  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6448  
6449  four_bytes_remain_match_nolit_encodeBetterBlockAsm:
6450  	TESTL R12, R12
6451  	JZ    match_nolit_emitcopy_end_encodeBetterBlockAsm
6452  	XORL  SI, SI
6453  	LEAL  -1(SI)(R12*4), R12
6454  	MOVB  R12, (CX)
6455  	MOVL  R8, 1(CX)
6456  	ADDQ  $0x05, CX
6457  	JMP   match_nolit_emitcopy_end_encodeBetterBlockAsm
6458  
6459  two_byte_offset_match_nolit_encodeBetterBlockAsm:
6460  	CMPL R12, $0x40
6461  	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm
6462  	CMPL R8, $0x00000800
6463  	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm
6464  	MOVL $0x00000001, SI
6465  	LEAL 16(SI), SI
6466  	MOVB R8, 1(CX)
6467  	MOVL R8, R9
6468  	SHRL $0x08, R9
6469  	SHLL $0x05, R9
6470  	ORL  R9, SI
6471  	MOVB SI, (CX)
6472  	ADDQ $0x02, CX
6473  	SUBL $0x08, R12
6474  
6475  	// emitRepeat
6476  	LEAL -4(R12), R12
6477  	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
6478  
6479  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
6480  	MOVL R12, SI
6481  	LEAL -4(R12), R12
6482  	CMPL SI, $0x08
6483  	JBE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
6484  	CMPL SI, $0x0c
6485  	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
6486  	CMPL R8, $0x00000800
6487  	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
6488  
6489  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
6490  	CMPL R12, $0x00000104
6491  	JB   repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
6492  	CMPL R12, $0x00010100
6493  	JB   repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
6494  	CMPL R12, $0x0100ffff
6495  	JB   repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
6496  	LEAL -16842747(R12), R12
6497  	MOVL $0xfffb001d, (CX)
6498  	MOVB $0xff, 4(CX)
6499  	ADDQ $0x05, CX
6500  	JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
6501  
6502  repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
6503  	LEAL -65536(R12), R12
6504  	MOVL R12, R8
6505  	MOVW $0x001d, (CX)
6506  	MOVW R12, 2(CX)
6507  	SARL $0x10, R8
6508  	MOVB R8, 4(CX)
6509  	ADDQ $0x05, CX
6510  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6511  
6512  repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
6513  	LEAL -256(R12), R12
6514  	MOVW $0x0019, (CX)
6515  	MOVW R12, 2(CX)
6516  	ADDQ $0x04, CX
6517  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6518  
6519  repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
6520  	LEAL -4(R12), R12
6521  	MOVW $0x0015, (CX)
6522  	MOVB R12, 2(CX)
6523  	ADDQ $0x03, CX
6524  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6525  
6526  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
6527  	SHLL $0x02, R12
6528  	ORL  $0x01, R12
6529  	MOVW R12, (CX)
6530  	ADDQ $0x02, CX
6531  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6532  
6533  repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
6534  	XORQ SI, SI
6535  	LEAL 1(SI)(R12*4), R12
6536  	MOVB R8, 1(CX)
6537  	SARL $0x08, R8
6538  	SHLL $0x05, R8
6539  	ORL  R8, R12
6540  	MOVB R12, (CX)
6541  	ADDQ $0x02, CX
6542  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6543  
6544  long_offset_short_match_nolit_encodeBetterBlockAsm:
6545  	MOVB $0xee, (CX)
6546  	MOVW R8, 1(CX)
6547  	LEAL -60(R12), R12
6548  	ADDQ $0x03, CX
6549  
6550  	// emitRepeat
6551  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short:
6552  	MOVL R12, SI
6553  	LEAL -4(R12), R12
6554  	CMPL SI, $0x08
6555  	JBE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short
6556  	CMPL SI, $0x0c
6557  	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
6558  	CMPL R8, $0x00000800
6559  	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
6560  
6561  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
6562  	CMPL R12, $0x00000104
6563  	JB   repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short
6564  	CMPL R12, $0x00010100
6565  	JB   repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short
6566  	CMPL R12, $0x0100ffff
6567  	JB   repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short
6568  	LEAL -16842747(R12), R12
6569  	MOVL $0xfffb001d, (CX)
6570  	MOVB $0xff, 4(CX)
6571  	ADDQ $0x05, CX
6572  	JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short
6573  
6574  repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short:
6575  	LEAL -65536(R12), R12
6576  	MOVL R12, R8
6577  	MOVW $0x001d, (CX)
6578  	MOVW R12, 2(CX)
6579  	SARL $0x10, R8
6580  	MOVB R8, 4(CX)
6581  	ADDQ $0x05, CX
6582  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6583  
6584  repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short:
6585  	LEAL -256(R12), R12
6586  	MOVW $0x0019, (CX)
6587  	MOVW R12, 2(CX)
6588  	ADDQ $0x04, CX
6589  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6590  
6591  repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short:
6592  	LEAL -4(R12), R12
6593  	MOVW $0x0015, (CX)
6594  	MOVB R12, 2(CX)
6595  	ADDQ $0x03, CX
6596  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6597  
6598  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short:
6599  	SHLL $0x02, R12
6600  	ORL  $0x01, R12
6601  	MOVW R12, (CX)
6602  	ADDQ $0x02, CX
6603  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6604  
6605  repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
6606  	XORQ SI, SI
6607  	LEAL 1(SI)(R12*4), R12
6608  	MOVB R8, 1(CX)
6609  	SARL $0x08, R8
6610  	SHLL $0x05, R8
6611  	ORL  R8, R12
6612  	MOVB R12, (CX)
6613  	ADDQ $0x02, CX
6614  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6615  
6616  two_byte_offset_short_match_nolit_encodeBetterBlockAsm:
6617  	MOVL R12, SI
6618  	SHLL $0x02, SI
6619  	CMPL R12, $0x0c
6620  	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm
6621  	CMPL R8, $0x00000800
6622  	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm
6623  	LEAL -15(SI), SI
6624  	MOVB R8, 1(CX)
6625  	SHRL $0x08, R8
6626  	SHLL $0x05, R8
6627  	ORL  R8, SI
6628  	MOVB SI, (CX)
6629  	ADDQ $0x02, CX
6630  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6631  
6632  emit_copy_three_match_nolit_encodeBetterBlockAsm:
6633  	LEAL -2(SI), SI
6634  	MOVB SI, (CX)
6635  	MOVW R8, 1(CX)
6636  	ADDQ $0x03, CX
6637  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6638  
6639  match_is_repeat_encodeBetterBlockAsm:
6640  	MOVL 12(SP), SI
6641  	CMPL SI, DI
6642  	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
6643  	MOVL DI, R9
6644  	MOVL DI, 12(SP)
6645  	LEAQ (BX)(SI*1), R10
6646  	SUBL SI, R9
6647  	LEAL -1(R9), SI
6648  	CMPL SI, $0x3c
6649  	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm
6650  	CMPL SI, $0x00000100
6651  	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm
6652  	CMPL SI, $0x00010000
6653  	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm
6654  	CMPL SI, $0x01000000
6655  	JB   four_bytes_match_emit_repeat_encodeBetterBlockAsm
6656  	MOVB $0xfc, (CX)
6657  	MOVL SI, 1(CX)
6658  	ADDQ $0x05, CX
6659  	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
6660  
6661  four_bytes_match_emit_repeat_encodeBetterBlockAsm:
6662  	MOVL SI, R11
6663  	SHRL $0x10, R11
6664  	MOVB $0xf8, (CX)
6665  	MOVW SI, 1(CX)
6666  	MOVB R11, 3(CX)
6667  	ADDQ $0x04, CX
6668  	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
6669  
6670  three_bytes_match_emit_repeat_encodeBetterBlockAsm:
6671  	MOVB $0xf4, (CX)
6672  	MOVW SI, 1(CX)
6673  	ADDQ $0x03, CX
6674  	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
6675  
6676  two_bytes_match_emit_repeat_encodeBetterBlockAsm:
6677  	MOVB $0xf0, (CX)
6678  	MOVB SI, 1(CX)
6679  	ADDQ $0x02, CX
6680  	CMPL SI, $0x40
6681  	JB   memmove_match_emit_repeat_encodeBetterBlockAsm
6682  	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
6683  
6684  one_byte_match_emit_repeat_encodeBetterBlockAsm:
6685  	SHLB $0x02, SI
6686  	MOVB SI, (CX)
6687  	ADDQ $0x01, CX
6688  
6689  memmove_match_emit_repeat_encodeBetterBlockAsm:
6690  	LEAQ (CX)(R9*1), SI
6691  
6692  	// genMemMoveShort
6693  	CMPQ R9, $0x04
6694  	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4
6695  	CMPQ R9, $0x08
6696  	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7
6697  	CMPQ R9, $0x10
6698  	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16
6699  	CMPQ R9, $0x20
6700  	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32
6701  	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64
6702  
6703  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4:
6704  	MOVL (R10), R11
6705  	MOVL R11, (CX)
6706  	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
6707  
6708  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7:
6709  	MOVL (R10), R11
6710  	MOVL -4(R10)(R9*1), R10
6711  	MOVL R11, (CX)
6712  	MOVL R10, -4(CX)(R9*1)
6713  	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
6714  
6715  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16:
6716  	MOVQ (R10), R11
6717  	MOVQ -8(R10)(R9*1), R10
6718  	MOVQ R11, (CX)
6719  	MOVQ R10, -8(CX)(R9*1)
6720  	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
6721  
6722  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32:
6723  	MOVOU (R10), X0
6724  	MOVOU -16(R10)(R9*1), X1
6725  	MOVOU X0, (CX)
6726  	MOVOU X1, -16(CX)(R9*1)
6727  	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
6728  
6729  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64:
6730  	MOVOU (R10), X0
6731  	MOVOU 16(R10), X1
6732  	MOVOU -32(R10)(R9*1), X2
6733  	MOVOU -16(R10)(R9*1), X3
6734  	MOVOU X0, (CX)
6735  	MOVOU X1, 16(CX)
6736  	MOVOU X2, -32(CX)(R9*1)
6737  	MOVOU X3, -16(CX)(R9*1)
6738  
6739  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm:
6740  	MOVQ SI, CX
6741  	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
6742  
6743  memmove_long_match_emit_repeat_encodeBetterBlockAsm:
6744  	LEAQ (CX)(R9*1), SI
6745  
6746  	// genMemMoveLong
6747  	MOVOU (R10), X0
6748  	MOVOU 16(R10), X1
6749  	MOVOU -32(R10)(R9*1), X2
6750  	MOVOU -16(R10)(R9*1), X3
6751  	MOVQ  R9, R13
6752  	SHRQ  $0x05, R13
6753  	MOVQ  CX, R11
6754  	ANDL  $0x0000001f, R11
6755  	MOVQ  $0x00000040, R14
6756  	SUBQ  R11, R14
6757  	DECQ  R13
6758  	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
6759  	LEAQ  -32(R10)(R14*1), R11
6760  	LEAQ  -32(CX)(R14*1), R15
6761  
6762  emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back:
6763  	MOVOU (R11), X4
6764  	MOVOU 16(R11), X5
6765  	MOVOA X4, (R15)
6766  	MOVOA X5, 16(R15)
6767  	ADDQ  $0x20, R15
6768  	ADDQ  $0x20, R11
6769  	ADDQ  $0x20, R14
6770  	DECQ  R13
6771  	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back
6772  
6773  emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32:
6774  	MOVOU -32(R10)(R14*1), X4
6775  	MOVOU -16(R10)(R14*1), X5
6776  	MOVOA X4, -32(CX)(R14*1)
6777  	MOVOA X5, -16(CX)(R14*1)
6778  	ADDQ  $0x20, R14
6779  	CMPQ  R9, R14
6780  	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
6781  	MOVOU X0, (CX)
6782  	MOVOU X1, 16(CX)
6783  	MOVOU X2, -32(CX)(R9*1)
6784  	MOVOU X3, -16(CX)(R9*1)
6785  	MOVQ  SI, CX
6786  
6787  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm:
6788  	ADDL R12, DX
6789  	ADDL $0x04, R12
6790  	MOVL DX, 12(SP)
6791  
6792  	// emitRepeat
6793  emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm:
6794  	MOVL R12, SI
6795  	LEAL -4(R12), R12
6796  	CMPL SI, $0x08
6797  	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm
6798  	CMPL SI, $0x0c
6799  	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
6800  	CMPL R8, $0x00000800
6801  	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
6802  
6803  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
6804  	CMPL R12, $0x00000104
6805  	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm
6806  	CMPL R12, $0x00010100
6807  	JB   repeat_four_match_nolit_repeat_encodeBetterBlockAsm
6808  	CMPL R12, $0x0100ffff
6809  	JB   repeat_five_match_nolit_repeat_encodeBetterBlockAsm
6810  	LEAL -16842747(R12), R12
6811  	MOVL $0xfffb001d, (CX)
6812  	MOVB $0xff, 4(CX)
6813  	ADDQ $0x05, CX
6814  	JMP  emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm
6815  
6816  repeat_five_match_nolit_repeat_encodeBetterBlockAsm:
6817  	LEAL -65536(R12), R12
6818  	MOVL R12, R8
6819  	MOVW $0x001d, (CX)
6820  	MOVW R12, 2(CX)
6821  	SARL $0x10, R8
6822  	MOVB R8, 4(CX)
6823  	ADDQ $0x05, CX
6824  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6825  
6826  repeat_four_match_nolit_repeat_encodeBetterBlockAsm:
6827  	LEAL -256(R12), R12
6828  	MOVW $0x0019, (CX)
6829  	MOVW R12, 2(CX)
6830  	ADDQ $0x04, CX
6831  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6832  
6833  repeat_three_match_nolit_repeat_encodeBetterBlockAsm:
6834  	LEAL -4(R12), R12
6835  	MOVW $0x0015, (CX)
6836  	MOVB R12, 2(CX)
6837  	ADDQ $0x03, CX
6838  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6839  
6840  repeat_two_match_nolit_repeat_encodeBetterBlockAsm:
6841  	SHLL $0x02, R12
6842  	ORL  $0x01, R12
6843  	MOVW R12, (CX)
6844  	ADDQ $0x02, CX
6845  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
6846  
6847  repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
6848  	XORQ SI, SI
6849  	LEAL 1(SI)(R12*4), R12
6850  	MOVB R8, 1(CX)
6851  	SARL $0x08, R8
6852  	SHLL $0x05, R8
6853  	ORL  R8, R12
6854  	MOVB R12, (CX)
6855  	ADDQ $0x02, CX
6856  
6857  match_nolit_emitcopy_end_encodeBetterBlockAsm:
6858  	CMPL DX, 8(SP)
6859  	JAE  emit_remainder_encodeBetterBlockAsm
6860  	CMPQ CX, (SP)
6861  	JB   match_nolit_dst_ok_encodeBetterBlockAsm
6862  	MOVQ $0x00000000, ret+56(FP)
6863  	RET
6864  
6865  match_nolit_dst_ok_encodeBetterBlockAsm:
6866  	MOVQ  $0x00cf1bbcdcbfa563, SI
6867  	MOVQ  $0x9e3779b1, R8
6868  	LEAQ  1(DI), DI
6869  	LEAQ  -2(DX), R9
6870  	MOVQ  (BX)(DI*1), R10
6871  	MOVQ  1(BX)(DI*1), R11
6872  	MOVQ  (BX)(R9*1), R12
6873  	MOVQ  1(BX)(R9*1), R13
6874  	SHLQ  $0x08, R10
6875  	IMULQ SI, R10
6876  	SHRQ  $0x2f, R10
6877  	SHLQ  $0x20, R11
6878  	IMULQ R8, R11
6879  	SHRQ  $0x32, R11
6880  	SHLQ  $0x08, R12
6881  	IMULQ SI, R12
6882  	SHRQ  $0x2f, R12
6883  	SHLQ  $0x20, R13
6884  	IMULQ R8, R13
6885  	SHRQ  $0x32, R13
6886  	LEAQ  1(DI), R8
6887  	LEAQ  1(R9), R14
6888  	MOVL  DI, (AX)(R10*4)
6889  	MOVL  R9, (AX)(R12*4)
6890  	MOVL  R8, 524288(AX)(R11*4)
6891  	MOVL  R14, 524288(AX)(R13*4)
6892  	LEAQ  1(R9)(DI*1), R8
6893  	SHRQ  $0x01, R8
6894  	ADDQ  $0x01, DI
6895  	SUBQ  $0x01, R9
6896  
6897  index_loop_encodeBetterBlockAsm:
6898  	CMPQ  R8, R9
6899  	JAE   search_loop_encodeBetterBlockAsm
6900  	MOVQ  (BX)(DI*1), R10
6901  	MOVQ  (BX)(R8*1), R11
6902  	SHLQ  $0x08, R10
6903  	IMULQ SI, R10
6904  	SHRQ  $0x2f, R10
6905  	SHLQ  $0x08, R11
6906  	IMULQ SI, R11
6907  	SHRQ  $0x2f, R11
6908  	MOVL  DI, (AX)(R10*4)
6909  	MOVL  R8, (AX)(R11*4)
6910  	ADDQ  $0x02, DI
6911  	ADDQ  $0x02, R8
6912  	JMP   index_loop_encodeBetterBlockAsm
6913  
6914  emit_remainder_encodeBetterBlockAsm:
6915  	MOVQ src_len+32(FP), AX
6916  	SUBL 12(SP), AX
6917  	LEAQ 5(CX)(AX*1), AX
6918  	CMPQ AX, (SP)
6919  	JB   emit_remainder_ok_encodeBetterBlockAsm
6920  	MOVQ $0x00000000, ret+56(FP)
6921  	RET
6922  
6923  emit_remainder_ok_encodeBetterBlockAsm:
6924  	MOVQ src_len+32(FP), AX
6925  	MOVL 12(SP), DX
6926  	CMPL DX, AX
6927  	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm
6928  	MOVL AX, SI
6929  	MOVL AX, 12(SP)
6930  	LEAQ (BX)(DX*1), AX
6931  	SUBL DX, SI
6932  	LEAL -1(SI), DX
6933  	CMPL DX, $0x3c
6934  	JB   one_byte_emit_remainder_encodeBetterBlockAsm
6935  	CMPL DX, $0x00000100
6936  	JB   two_bytes_emit_remainder_encodeBetterBlockAsm
6937  	CMPL DX, $0x00010000
6938  	JB   three_bytes_emit_remainder_encodeBetterBlockAsm
6939  	CMPL DX, $0x01000000
6940  	JB   four_bytes_emit_remainder_encodeBetterBlockAsm
6941  	MOVB $0xfc, (CX)
6942  	MOVL DX, 1(CX)
6943  	ADDQ $0x05, CX
6944  	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
6945  
6946  four_bytes_emit_remainder_encodeBetterBlockAsm:
6947  	MOVL DX, BX
6948  	SHRL $0x10, BX
6949  	MOVB $0xf8, (CX)
6950  	MOVW DX, 1(CX)
6951  	MOVB BL, 3(CX)
6952  	ADDQ $0x04, CX
6953  	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
6954  
6955  three_bytes_emit_remainder_encodeBetterBlockAsm:
6956  	MOVB $0xf4, (CX)
6957  	MOVW DX, 1(CX)
6958  	ADDQ $0x03, CX
6959  	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
6960  
6961  two_bytes_emit_remainder_encodeBetterBlockAsm:
6962  	MOVB $0xf0, (CX)
6963  	MOVB DL, 1(CX)
6964  	ADDQ $0x02, CX
6965  	CMPL DX, $0x40
6966  	JB   memmove_emit_remainder_encodeBetterBlockAsm
6967  	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
6968  
6969  one_byte_emit_remainder_encodeBetterBlockAsm:
6970  	SHLB $0x02, DL
6971  	MOVB DL, (CX)
6972  	ADDQ $0x01, CX
6973  
6974  memmove_emit_remainder_encodeBetterBlockAsm:
6975  	LEAQ (CX)(SI*1), DX
6976  	MOVL SI, BX
6977  
6978  	// genMemMoveShort
6979  	CMPQ BX, $0x03
6980  	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2
6981  	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3
6982  	CMPQ BX, $0x08
6983  	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7
6984  	CMPQ BX, $0x10
6985  	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16
6986  	CMPQ BX, $0x20
6987  	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32
6988  	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64
6989  
6990  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2:
6991  	MOVB (AX), SI
6992  	MOVB -1(AX)(BX*1), AL
6993  	MOVB SI, (CX)
6994  	MOVB AL, -1(CX)(BX*1)
6995  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
6996  
6997  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3:
6998  	MOVW (AX), SI
6999  	MOVB 2(AX), AL
7000  	MOVW SI, (CX)
7001  	MOVB AL, 2(CX)
7002  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
7003  
7004  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7:
7005  	MOVL (AX), SI
7006  	MOVL -4(AX)(BX*1), AX
7007  	MOVL SI, (CX)
7008  	MOVL AX, -4(CX)(BX*1)
7009  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
7010  
7011  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16:
7012  	MOVQ (AX), SI
7013  	MOVQ -8(AX)(BX*1), AX
7014  	MOVQ SI, (CX)
7015  	MOVQ AX, -8(CX)(BX*1)
7016  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
7017  
7018  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32:
7019  	MOVOU (AX), X0
7020  	MOVOU -16(AX)(BX*1), X1
7021  	MOVOU X0, (CX)
7022  	MOVOU X1, -16(CX)(BX*1)
7023  	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm
7024  
7025  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64:
7026  	MOVOU (AX), X0
7027  	MOVOU 16(AX), X1
7028  	MOVOU -32(AX)(BX*1), X2
7029  	MOVOU -16(AX)(BX*1), X3
7030  	MOVOU X0, (CX)
7031  	MOVOU X1, 16(CX)
7032  	MOVOU X2, -32(CX)(BX*1)
7033  	MOVOU X3, -16(CX)(BX*1)
7034  
7035  memmove_end_copy_emit_remainder_encodeBetterBlockAsm:
7036  	MOVQ DX, CX
7037  	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm
7038  
7039  memmove_long_emit_remainder_encodeBetterBlockAsm:
7040  	LEAQ (CX)(SI*1), DX
7041  	MOVL SI, BX
7042  
7043  	// genMemMoveLong
7044  	MOVOU (AX), X0
7045  	MOVOU 16(AX), X1
7046  	MOVOU -32(AX)(BX*1), X2
7047  	MOVOU -16(AX)(BX*1), X3
7048  	MOVQ  BX, DI
7049  	SHRQ  $0x05, DI
7050  	MOVQ  CX, SI
7051  	ANDL  $0x0000001f, SI
7052  	MOVQ  $0x00000040, R8
7053  	SUBQ  SI, R8
7054  	DECQ  DI
7055  	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
7056  	LEAQ  -32(AX)(R8*1), SI
7057  	LEAQ  -32(CX)(R8*1), R9
7058  
7059  emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back:
7060  	MOVOU (SI), X4
7061  	MOVOU 16(SI), X5
7062  	MOVOA X4, (R9)
7063  	MOVOA X5, 16(R9)
7064  	ADDQ  $0x20, R9
7065  	ADDQ  $0x20, SI
7066  	ADDQ  $0x20, R8
7067  	DECQ  DI
7068  	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back
7069  
7070  emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32:
7071  	MOVOU -32(AX)(R8*1), X4
7072  	MOVOU -16(AX)(R8*1), X5
7073  	MOVOA X4, -32(CX)(R8*1)
7074  	MOVOA X5, -16(CX)(R8*1)
7075  	ADDQ  $0x20, R8
7076  	CMPQ  BX, R8
7077  	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
7078  	MOVOU X0, (CX)
7079  	MOVOU X1, 16(CX)
7080  	MOVOU X2, -32(CX)(BX*1)
7081  	MOVOU X3, -16(CX)(BX*1)
7082  	MOVQ  DX, CX
7083  
7084  emit_literal_done_emit_remainder_encodeBetterBlockAsm:
7085  	MOVQ dst_base+0(FP), AX
7086  	SUBQ AX, CX
7087  	MOVQ CX, ret+56(FP)
7088  	RET
7089  
7090  // func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int
7091  // Requires: BMI, SSE2
7092  TEXT ·encodeBetterBlockAsm4MB(SB), $24-64
7093  	MOVQ tmp+48(FP), AX
7094  	MOVQ dst_base+0(FP), CX
7095  	MOVQ $0x00001200, DX
7096  	MOVQ AX, BX
7097  	PXOR X0, X0
7098  
7099  zero_loop_encodeBetterBlockAsm4MB:
7100  	MOVOU X0, (BX)
7101  	MOVOU X0, 16(BX)
7102  	MOVOU X0, 32(BX)
7103  	MOVOU X0, 48(BX)
7104  	MOVOU X0, 64(BX)
7105  	MOVOU X0, 80(BX)
7106  	MOVOU X0, 96(BX)
7107  	MOVOU X0, 112(BX)
7108  	ADDQ  $0x80, BX
7109  	DECQ  DX
7110  	JNZ   zero_loop_encodeBetterBlockAsm4MB
7111  	MOVL  $0x00000000, 12(SP)
7112  	MOVQ  src_len+32(FP), DX
7113  	LEAQ  -6(DX), BX
7114  	LEAQ  -8(DX), SI
7115  	MOVL  SI, 8(SP)
7116  	SHRQ  $0x05, DX
7117  	SUBL  DX, BX
7118  	LEAQ  (CX)(BX*1), BX
7119  	MOVQ  BX, (SP)
7120  	MOVL  $0x00000001, DX
7121  	MOVL  $0x00000000, 16(SP)
7122  	MOVQ  src_base+24(FP), BX
7123  
7124  search_loop_encodeBetterBlockAsm4MB:
7125  	MOVL DX, SI
7126  	SUBL 12(SP), SI
7127  	SHRL $0x07, SI
7128  	CMPL SI, $0x63
7129  	JBE  check_maxskip_ok_encodeBetterBlockAsm4MB
7130  	LEAL 100(DX), SI
7131  	JMP  check_maxskip_cont_encodeBetterBlockAsm4MB
7132  
7133  check_maxskip_ok_encodeBetterBlockAsm4MB:
7134  	LEAL 1(DX)(SI*1), SI
7135  
7136  check_maxskip_cont_encodeBetterBlockAsm4MB:
7137  	CMPL  SI, 8(SP)
7138  	JAE   emit_remainder_encodeBetterBlockAsm4MB
7139  	MOVQ  (BX)(DX*1), DI
7140  	MOVL  SI, 20(SP)
7141  	MOVQ  $0x00cf1bbcdcbfa563, R9
7142  	MOVQ  $0x9e3779b1, SI
7143  	MOVQ  DI, R10
7144  	MOVQ  DI, R11
7145  	SHLQ  $0x08, R10
7146  	IMULQ R9, R10
7147  	SHRQ  $0x2f, R10
7148  	SHLQ  $0x20, R11
7149  	IMULQ SI, R11
7150  	SHRQ  $0x32, R11
7151  	MOVL  (AX)(R10*4), SI
7152  	MOVL  524288(AX)(R11*4), R8
7153  	MOVL  DX, (AX)(R10*4)
7154  	MOVL  DX, 524288(AX)(R11*4)
7155  	MOVQ  (BX)(SI*1), R10
7156  	MOVQ  (BX)(R8*1), R11
7157  	CMPQ  R10, DI
7158  	JEQ   candidate_match_encodeBetterBlockAsm4MB
7159  	CMPQ  R11, DI
7160  	JNE   no_short_found_encodeBetterBlockAsm4MB
7161  	MOVL  R8, SI
7162  	JMP   candidate_match_encodeBetterBlockAsm4MB
7163  
7164  no_short_found_encodeBetterBlockAsm4MB:
7165  	CMPL R10, DI
7166  	JEQ  candidate_match_encodeBetterBlockAsm4MB
7167  	CMPL R11, DI
7168  	JEQ  candidateS_match_encodeBetterBlockAsm4MB
7169  	MOVL 20(SP), DX
7170  	JMP  search_loop_encodeBetterBlockAsm4MB
7171  
7172  candidateS_match_encodeBetterBlockAsm4MB:
7173  	SHRQ  $0x08, DI
7174  	MOVQ  DI, R10
7175  	SHLQ  $0x08, R10
7176  	IMULQ R9, R10
7177  	SHRQ  $0x2f, R10
7178  	MOVL  (AX)(R10*4), SI
7179  	INCL  DX
7180  	MOVL  DX, (AX)(R10*4)
7181  	CMPL  (BX)(SI*1), DI
7182  	JEQ   candidate_match_encodeBetterBlockAsm4MB
7183  	DECL  DX
7184  	MOVL  R8, SI
7185  
7186  candidate_match_encodeBetterBlockAsm4MB:
7187  	MOVL  12(SP), DI
7188  	TESTL SI, SI
7189  	JZ    match_extend_back_end_encodeBetterBlockAsm4MB
7190  
7191  match_extend_back_loop_encodeBetterBlockAsm4MB:
7192  	CMPL DX, DI
7193  	JBE  match_extend_back_end_encodeBetterBlockAsm4MB
7194  	MOVB -1(BX)(SI*1), R8
7195  	MOVB -1(BX)(DX*1), R9
7196  	CMPB R8, R9
7197  	JNE  match_extend_back_end_encodeBetterBlockAsm4MB
7198  	LEAL -1(DX), DX
7199  	DECL SI
7200  	JZ   match_extend_back_end_encodeBetterBlockAsm4MB
7201  	JMP  match_extend_back_loop_encodeBetterBlockAsm4MB
7202  
7203  match_extend_back_end_encodeBetterBlockAsm4MB:
7204  	MOVL DX, DI
7205  	SUBL 12(SP), DI
7206  	LEAQ 4(CX)(DI*1), DI
7207  	CMPQ DI, (SP)
7208  	JB   match_dst_size_check_encodeBetterBlockAsm4MB
7209  	MOVQ $0x00000000, ret+56(FP)
7210  	RET
7211  
7212  match_dst_size_check_encodeBetterBlockAsm4MB:
7213  	MOVL DX, DI
7214  	ADDL $0x04, DX
7215  	ADDL $0x04, SI
7216  	MOVQ src_len+32(FP), R8
7217  	SUBL DX, R8
7218  	LEAQ (BX)(DX*1), R9
7219  	LEAQ (BX)(SI*1), R10
7220  
7221  	// matchLen
7222  	XORL R12, R12
7223  
7224  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB:
7225  	CMPL R8, $0x10
7226  	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm4MB
7227  	MOVQ (R9)(R12*1), R11
7228  	MOVQ 8(R9)(R12*1), R13
7229  	XORQ (R10)(R12*1), R11
7230  	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB
7231  	XORQ 8(R10)(R12*1), R13
7232  	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB
7233  	LEAL -16(R8), R8
7234  	LEAL 16(R12), R12
7235  	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB
7236  
7237  matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB:
7238  #ifdef GOAMD64_v3
7239  	TZCNTQ R13, R13
7240  
7241  #else
7242  	BSFQ R13, R13
7243  
7244  #endif
7245  	SARQ $0x03, R13
7246  	LEAL 8(R12)(R13*1), R12
7247  	JMP  match_nolit_end_encodeBetterBlockAsm4MB
7248  
7249  matchlen_match8_match_nolit_encodeBetterBlockAsm4MB:
7250  	CMPL R8, $0x08
7251  	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
7252  	MOVQ (R9)(R12*1), R11
7253  	XORQ (R10)(R12*1), R11
7254  	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB
7255  	LEAL -8(R8), R8
7256  	LEAL 8(R12), R12
7257  	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
7258  
7259  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB:
7260  #ifdef GOAMD64_v3
7261  	TZCNTQ R11, R11
7262  
7263  #else
7264  	BSFQ R11, R11
7265  
7266  #endif
7267  	SARQ $0x03, R11
7268  	LEAL (R12)(R11*1), R12
7269  	JMP  match_nolit_end_encodeBetterBlockAsm4MB
7270  
7271  matchlen_match4_match_nolit_encodeBetterBlockAsm4MB:
7272  	CMPL R8, $0x04
7273  	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
7274  	MOVL (R9)(R12*1), R11
7275  	CMPL (R10)(R12*1), R11
7276  	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
7277  	LEAL -4(R8), R8
7278  	LEAL 4(R12), R12
7279  
7280  matchlen_match2_match_nolit_encodeBetterBlockAsm4MB:
7281  	CMPL R8, $0x01
7282  	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
7283  	JB   match_nolit_end_encodeBetterBlockAsm4MB
7284  	MOVW (R9)(R12*1), R11
7285  	CMPW (R10)(R12*1), R11
7286  	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
7287  	LEAL 2(R12), R12
7288  	SUBL $0x02, R8
7289  	JZ   match_nolit_end_encodeBetterBlockAsm4MB
7290  
7291  matchlen_match1_match_nolit_encodeBetterBlockAsm4MB:
7292  	MOVB (R9)(R12*1), R11
7293  	CMPB (R10)(R12*1), R11
7294  	JNE  match_nolit_end_encodeBetterBlockAsm4MB
7295  	LEAL 1(R12), R12
7296  
7297  match_nolit_end_encodeBetterBlockAsm4MB:
7298  	MOVL DX, R8
7299  	SUBL SI, R8
7300  
7301  	// Check if repeat
7302  	CMPL 16(SP), R8
7303  	JEQ  match_is_repeat_encodeBetterBlockAsm4MB
7304  	CMPL R12, $0x01
7305  	JA   match_length_ok_encodeBetterBlockAsm4MB
7306  	CMPL R8, $0x0000ffff
7307  	JBE  match_length_ok_encodeBetterBlockAsm4MB
7308  	MOVL 20(SP), DX
7309  	INCL DX
7310  	JMP  search_loop_encodeBetterBlockAsm4MB
7311  
7312  match_length_ok_encodeBetterBlockAsm4MB:
7313  	MOVL R8, 16(SP)
7314  	MOVL 12(SP), SI
7315  	CMPL SI, DI
7316  	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm4MB
7317  	MOVL DI, R9
7318  	MOVL DI, 12(SP)
7319  	LEAQ (BX)(SI*1), R10
7320  	SUBL SI, R9
7321  	LEAL -1(R9), SI
7322  	CMPL SI, $0x3c
7323  	JB   one_byte_match_emit_encodeBetterBlockAsm4MB
7324  	CMPL SI, $0x00000100
7325  	JB   two_bytes_match_emit_encodeBetterBlockAsm4MB
7326  	CMPL SI, $0x00010000
7327  	JB   three_bytes_match_emit_encodeBetterBlockAsm4MB
7328  	MOVL SI, R11
7329  	SHRL $0x10, R11
7330  	MOVB $0xf8, (CX)
7331  	MOVW SI, 1(CX)
7332  	MOVB R11, 3(CX)
7333  	ADDQ $0x04, CX
7334  	JMP  memmove_long_match_emit_encodeBetterBlockAsm4MB
7335  
7336  three_bytes_match_emit_encodeBetterBlockAsm4MB:
7337  	MOVB $0xf4, (CX)
7338  	MOVW SI, 1(CX)
7339  	ADDQ $0x03, CX
7340  	JMP  memmove_long_match_emit_encodeBetterBlockAsm4MB
7341  
7342  two_bytes_match_emit_encodeBetterBlockAsm4MB:
7343  	MOVB $0xf0, (CX)
7344  	MOVB SI, 1(CX)
7345  	ADDQ $0x02, CX
7346  	CMPL SI, $0x40
7347  	JB   memmove_match_emit_encodeBetterBlockAsm4MB
7348  	JMP  memmove_long_match_emit_encodeBetterBlockAsm4MB
7349  
7350  one_byte_match_emit_encodeBetterBlockAsm4MB:
7351  	SHLB $0x02, SI
7352  	MOVB SI, (CX)
7353  	ADDQ $0x01, CX
7354  
7355  memmove_match_emit_encodeBetterBlockAsm4MB:
7356  	LEAQ (CX)(R9*1), SI
7357  
7358  	// genMemMoveShort
7359  	CMPQ R9, $0x04
7360  	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4
7361  	CMPQ R9, $0x08
7362  	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7
7363  	CMPQ R9, $0x10
7364  	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16
7365  	CMPQ R9, $0x20
7366  	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32
7367  	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64
7368  
7369  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4:
7370  	MOVL (R10), R11
7371  	MOVL R11, (CX)
7372  	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
7373  
7374  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7:
7375  	MOVL (R10), R11
7376  	MOVL -4(R10)(R9*1), R10
7377  	MOVL R11, (CX)
7378  	MOVL R10, -4(CX)(R9*1)
7379  	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
7380  
7381  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16:
7382  	MOVQ (R10), R11
7383  	MOVQ -8(R10)(R9*1), R10
7384  	MOVQ R11, (CX)
7385  	MOVQ R10, -8(CX)(R9*1)
7386  	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
7387  
7388  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32:
7389  	MOVOU (R10), X0
7390  	MOVOU -16(R10)(R9*1), X1
7391  	MOVOU X0, (CX)
7392  	MOVOU X1, -16(CX)(R9*1)
7393  	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
7394  
7395  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64:
7396  	MOVOU (R10), X0
7397  	MOVOU 16(R10), X1
7398  	MOVOU -32(R10)(R9*1), X2
7399  	MOVOU -16(R10)(R9*1), X3
7400  	MOVOU X0, (CX)
7401  	MOVOU X1, 16(CX)
7402  	MOVOU X2, -32(CX)(R9*1)
7403  	MOVOU X3, -16(CX)(R9*1)
7404  
7405  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB:
7406  	MOVQ SI, CX
7407  	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm4MB
7408  
7409  memmove_long_match_emit_encodeBetterBlockAsm4MB:
7410  	LEAQ (CX)(R9*1), SI
7411  
7412  	// genMemMoveLong
7413  	MOVOU (R10), X0
7414  	MOVOU 16(R10), X1
7415  	MOVOU -32(R10)(R9*1), X2
7416  	MOVOU -16(R10)(R9*1), X3
7417  	MOVQ  R9, R13
7418  	SHRQ  $0x05, R13
7419  	MOVQ  CX, R11
7420  	ANDL  $0x0000001f, R11
7421  	MOVQ  $0x00000040, R14
7422  	SUBQ  R11, R14
7423  	DECQ  R13
7424  	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
7425  	LEAQ  -32(R10)(R14*1), R11
7426  	LEAQ  -32(CX)(R14*1), R15
7427  
7428  emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back:
7429  	MOVOU (R11), X4
7430  	MOVOU 16(R11), X5
7431  	MOVOA X4, (R15)
7432  	MOVOA X5, 16(R15)
7433  	ADDQ  $0x20, R15
7434  	ADDQ  $0x20, R11
7435  	ADDQ  $0x20, R14
7436  	DECQ  R13
7437  	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back
7438  
7439  emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
7440  	MOVOU -32(R10)(R14*1), X4
7441  	MOVOU -16(R10)(R14*1), X5
7442  	MOVOA X4, -32(CX)(R14*1)
7443  	MOVOA X5, -16(CX)(R14*1)
7444  	ADDQ  $0x20, R14
7445  	CMPQ  R9, R14
7446  	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
7447  	MOVOU X0, (CX)
7448  	MOVOU X1, 16(CX)
7449  	MOVOU X2, -32(CX)(R9*1)
7450  	MOVOU X3, -16(CX)(R9*1)
7451  	MOVQ  SI, CX
7452  
7453  emit_literal_done_match_emit_encodeBetterBlockAsm4MB:
7454  	ADDL R12, DX
7455  	ADDL $0x04, R12
7456  	MOVL DX, 12(SP)
7457  
7458  	// emitCopy
7459  	CMPL R8, $0x00010000
7460  	JB   two_byte_offset_match_nolit_encodeBetterBlockAsm4MB
7461  	CMPL R12, $0x40
7462  	JBE  four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
7463  	MOVB $0xff, (CX)
7464  	MOVL R8, 1(CX)
7465  	LEAL -64(R12), R12
7466  	ADDQ $0x05, CX
7467  	CMPL R12, $0x04
7468  	JB   four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
7469  
7470  	// emitRepeat
7471  	MOVL R12, SI
7472  	LEAL -4(R12), R12
7473  	CMPL SI, $0x08
7474  	JBE  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy
7475  	CMPL SI, $0x0c
7476  	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
7477  	CMPL R8, $0x00000800
7478  	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
7479  
7480  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
7481  	CMPL R12, $0x00000104
7482  	JB   repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy
7483  	CMPL R12, $0x00010100
7484  	JB   repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy
7485  	LEAL -65536(R12), R12
7486  	MOVL R12, R8
7487  	MOVW $0x001d, (CX)
7488  	MOVW R12, 2(CX)
7489  	SARL $0x10, R8
7490  	MOVB R8, 4(CX)
7491  	ADDQ $0x05, CX
7492  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7493  
7494  repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
7495  	LEAL -256(R12), R12
7496  	MOVW $0x0019, (CX)
7497  	MOVW R12, 2(CX)
7498  	ADDQ $0x04, CX
7499  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7500  
7501  repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
7502  	LEAL -4(R12), R12
7503  	MOVW $0x0015, (CX)
7504  	MOVB R12, 2(CX)
7505  	ADDQ $0x03, CX
7506  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7507  
7508  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
7509  	SHLL $0x02, R12
7510  	ORL  $0x01, R12
7511  	MOVW R12, (CX)
7512  	ADDQ $0x02, CX
7513  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7514  
7515  repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
7516  	XORQ SI, SI
7517  	LEAL 1(SI)(R12*4), R12
7518  	MOVB R8, 1(CX)
7519  	SARL $0x08, R8
7520  	SHLL $0x05, R8
7521  	ORL  R8, R12
7522  	MOVB R12, (CX)
7523  	ADDQ $0x02, CX
7524  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7525  
7526  four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB:
7527  	TESTL R12, R12
7528  	JZ    match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7529  	XORL  SI, SI
7530  	LEAL  -1(SI)(R12*4), R12
7531  	MOVB  R12, (CX)
7532  	MOVL  R8, 1(CX)
7533  	ADDQ  $0x05, CX
7534  	JMP   match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7535  
7536  two_byte_offset_match_nolit_encodeBetterBlockAsm4MB:
7537  	CMPL R12, $0x40
7538  	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB
7539  	CMPL R8, $0x00000800
7540  	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm4MB
7541  	MOVL $0x00000001, SI
7542  	LEAL 16(SI), SI
7543  	MOVB R8, 1(CX)
7544  	SHRL $0x08, R8
7545  	SHLL $0x05, R8
7546  	ORL  R8, SI
7547  	MOVB SI, (CX)
7548  	ADDQ $0x02, CX
7549  	SUBL $0x08, R12
7550  
7551  	// emitRepeat
7552  	LEAL -4(R12), R12
7553  	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
7554  	MOVL R12, SI
7555  	LEAL -4(R12), R12
7556  	CMPL SI, $0x08
7557  	JBE  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
7558  	CMPL SI, $0x0c
7559  	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
7560  	CMPL R8, $0x00000800
7561  	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
7562  
7563  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
7564  	CMPL R12, $0x00000104
7565  	JB   repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
7566  	CMPL R12, $0x00010100
7567  	JB   repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
7568  	LEAL -65536(R12), R12
7569  	MOVL R12, R8
7570  	MOVW $0x001d, (CX)
7571  	MOVW R12, 2(CX)
7572  	SARL $0x10, R8
7573  	MOVB R8, 4(CX)
7574  	ADDQ $0x05, CX
7575  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7576  
7577  repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
7578  	LEAL -256(R12), R12
7579  	MOVW $0x0019, (CX)
7580  	MOVW R12, 2(CX)
7581  	ADDQ $0x04, CX
7582  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7583  
7584  repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
7585  	LEAL -4(R12), R12
7586  	MOVW $0x0015, (CX)
7587  	MOVB R12, 2(CX)
7588  	ADDQ $0x03, CX
7589  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7590  
7591  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
7592  	SHLL $0x02, R12
7593  	ORL  $0x01, R12
7594  	MOVW R12, (CX)
7595  	ADDQ $0x02, CX
7596  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7597  
7598  repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
7599  	XORQ SI, SI
7600  	LEAL 1(SI)(R12*4), R12
7601  	MOVB R8, 1(CX)
7602  	SARL $0x08, R8
7603  	SHLL $0x05, R8
7604  	ORL  R8, R12
7605  	MOVB R12, (CX)
7606  	ADDQ $0x02, CX
7607  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7608  
7609  long_offset_short_match_nolit_encodeBetterBlockAsm4MB:
7610  	MOVB $0xee, (CX)
7611  	MOVW R8, 1(CX)
7612  	LEAL -60(R12), R12
7613  	ADDQ $0x03, CX
7614  
7615  	// emitRepeat
7616  	MOVL R12, SI
7617  	LEAL -4(R12), R12
7618  	CMPL SI, $0x08
7619  	JBE  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
7620  	CMPL SI, $0x0c
7621  	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
7622  	CMPL R8, $0x00000800
7623  	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
7624  
7625  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
7626  	CMPL R12, $0x00000104
7627  	JB   repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
7628  	CMPL R12, $0x00010100
7629  	JB   repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
7630  	LEAL -65536(R12), R12
7631  	MOVL R12, R8
7632  	MOVW $0x001d, (CX)
7633  	MOVW R12, 2(CX)
7634  	SARL $0x10, R8
7635  	MOVB R8, 4(CX)
7636  	ADDQ $0x05, CX
7637  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7638  
7639  repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
7640  	LEAL -256(R12), R12
7641  	MOVW $0x0019, (CX)
7642  	MOVW R12, 2(CX)
7643  	ADDQ $0x04, CX
7644  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7645  
7646  repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
7647  	LEAL -4(R12), R12
7648  	MOVW $0x0015, (CX)
7649  	MOVB R12, 2(CX)
7650  	ADDQ $0x03, CX
7651  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7652  
7653  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
7654  	SHLL $0x02, R12
7655  	ORL  $0x01, R12
7656  	MOVW R12, (CX)
7657  	ADDQ $0x02, CX
7658  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7659  
7660  repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
7661  	XORQ SI, SI
7662  	LEAL 1(SI)(R12*4), R12
7663  	MOVB R8, 1(CX)
7664  	SARL $0x08, R8
7665  	SHLL $0x05, R8
7666  	ORL  R8, R12
7667  	MOVB R12, (CX)
7668  	ADDQ $0x02, CX
7669  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7670  
7671  two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB:
7672  	MOVL R12, SI
7673  	SHLL $0x02, SI
7674  	CMPL R12, $0x0c
7675  	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
7676  	CMPL R8, $0x00000800
7677  	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
7678  	LEAL -15(SI), SI
7679  	MOVB R8, 1(CX)
7680  	SHRL $0x08, R8
7681  	SHLL $0x05, R8
7682  	ORL  R8, SI
7683  	MOVB SI, (CX)
7684  	ADDQ $0x02, CX
7685  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7686  
7687  emit_copy_three_match_nolit_encodeBetterBlockAsm4MB:
7688  	LEAL -2(SI), SI
7689  	MOVB SI, (CX)
7690  	MOVW R8, 1(CX)
7691  	ADDQ $0x03, CX
7692  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7693  
7694  match_is_repeat_encodeBetterBlockAsm4MB:
7695  	MOVL 12(SP), SI
7696  	CMPL SI, DI
7697  	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
7698  	MOVL DI, R9
7699  	MOVL DI, 12(SP)
7700  	LEAQ (BX)(SI*1), R10
7701  	SUBL SI, R9
7702  	LEAL -1(R9), SI
7703  	CMPL SI, $0x3c
7704  	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm4MB
7705  	CMPL SI, $0x00000100
7706  	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
7707  	CMPL SI, $0x00010000
7708  	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
7709  	MOVL SI, R11
7710  	SHRL $0x10, R11
7711  	MOVB $0xf8, (CX)
7712  	MOVW SI, 1(CX)
7713  	MOVB R11, 3(CX)
7714  	ADDQ $0x04, CX
7715  	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
7716  
7717  three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
7718  	MOVB $0xf4, (CX)
7719  	MOVW SI, 1(CX)
7720  	ADDQ $0x03, CX
7721  	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
7722  
7723  two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
7724  	MOVB $0xf0, (CX)
7725  	MOVB SI, 1(CX)
7726  	ADDQ $0x02, CX
7727  	CMPL SI, $0x40
7728  	JB   memmove_match_emit_repeat_encodeBetterBlockAsm4MB
7729  	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
7730  
7731  one_byte_match_emit_repeat_encodeBetterBlockAsm4MB:
7732  	SHLB $0x02, SI
7733  	MOVB SI, (CX)
7734  	ADDQ $0x01, CX
7735  
7736  memmove_match_emit_repeat_encodeBetterBlockAsm4MB:
7737  	LEAQ (CX)(R9*1), SI
7738  
7739  	// genMemMoveShort
7740  	CMPQ R9, $0x04
7741  	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4
7742  	CMPQ R9, $0x08
7743  	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7
7744  	CMPQ R9, $0x10
7745  	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16
7746  	CMPQ R9, $0x20
7747  	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32
7748  	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64
7749  
7750  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4:
7751  	MOVL (R10), R11
7752  	MOVL R11, (CX)
7753  	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
7754  
7755  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7:
7756  	MOVL (R10), R11
7757  	MOVL -4(R10)(R9*1), R10
7758  	MOVL R11, (CX)
7759  	MOVL R10, -4(CX)(R9*1)
7760  	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
7761  
7762  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16:
7763  	MOVQ (R10), R11
7764  	MOVQ -8(R10)(R9*1), R10
7765  	MOVQ R11, (CX)
7766  	MOVQ R10, -8(CX)(R9*1)
7767  	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
7768  
7769  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32:
7770  	MOVOU (R10), X0
7771  	MOVOU -16(R10)(R9*1), X1
7772  	MOVOU X0, (CX)
7773  	MOVOU X1, -16(CX)(R9*1)
7774  	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
7775  
7776  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64:
7777  	MOVOU (R10), X0
7778  	MOVOU 16(R10), X1
7779  	MOVOU -32(R10)(R9*1), X2
7780  	MOVOU -16(R10)(R9*1), X3
7781  	MOVOU X0, (CX)
7782  	MOVOU X1, 16(CX)
7783  	MOVOU X2, -32(CX)(R9*1)
7784  	MOVOU X3, -16(CX)(R9*1)
7785  
7786  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB:
7787  	MOVQ SI, CX
7788  	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
7789  
7790  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB:
7791  	LEAQ (CX)(R9*1), SI
7792  
7793  	// genMemMoveLong
7794  	MOVOU (R10), X0
7795  	MOVOU 16(R10), X1
7796  	MOVOU -32(R10)(R9*1), X2
7797  	MOVOU -16(R10)(R9*1), X3
7798  	MOVQ  R9, R13
7799  	SHRQ  $0x05, R13
7800  	MOVQ  CX, R11
7801  	ANDL  $0x0000001f, R11
7802  	MOVQ  $0x00000040, R14
7803  	SUBQ  R11, R14
7804  	DECQ  R13
7805  	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
7806  	LEAQ  -32(R10)(R14*1), R11
7807  	LEAQ  -32(CX)(R14*1), R15
7808  
7809  emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back:
7810  	MOVOU (R11), X4
7811  	MOVOU 16(R11), X5
7812  	MOVOA X4, (R15)
7813  	MOVOA X5, 16(R15)
7814  	ADDQ  $0x20, R15
7815  	ADDQ  $0x20, R11
7816  	ADDQ  $0x20, R14
7817  	DECQ  R13
7818  	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back
7819  
7820  emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
7821  	MOVOU -32(R10)(R14*1), X4
7822  	MOVOU -16(R10)(R14*1), X5
7823  	MOVOA X4, -32(CX)(R14*1)
7824  	MOVOA X5, -16(CX)(R14*1)
7825  	ADDQ  $0x20, R14
7826  	CMPQ  R9, R14
7827  	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
7828  	MOVOU X0, (CX)
7829  	MOVOU X1, 16(CX)
7830  	MOVOU X2, -32(CX)(R9*1)
7831  	MOVOU X3, -16(CX)(R9*1)
7832  	MOVQ  SI, CX
7833  
7834  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB:
7835  	ADDL R12, DX
7836  	ADDL $0x04, R12
7837  	MOVL DX, 12(SP)
7838  
7839  	// emitRepeat
7840  	MOVL R12, SI
7841  	LEAL -4(R12), R12
7842  	CMPL SI, $0x08
7843  	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB
7844  	CMPL SI, $0x0c
7845  	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
7846  	CMPL R8, $0x00000800
7847  	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
7848  
7849  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
7850  	CMPL R12, $0x00000104
7851  	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB
7852  	CMPL R12, $0x00010100
7853  	JB   repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB
7854  	LEAL -65536(R12), R12
7855  	MOVL R12, R8
7856  	MOVW $0x001d, (CX)
7857  	MOVW R12, 2(CX)
7858  	SARL $0x10, R8
7859  	MOVB R8, 4(CX)
7860  	ADDQ $0x05, CX
7861  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7862  
7863  repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB:
7864  	LEAL -256(R12), R12
7865  	MOVW $0x0019, (CX)
7866  	MOVW R12, 2(CX)
7867  	ADDQ $0x04, CX
7868  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7869  
7870  repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB:
7871  	LEAL -4(R12), R12
7872  	MOVW $0x0015, (CX)
7873  	MOVB R12, 2(CX)
7874  	ADDQ $0x03, CX
7875  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7876  
7877  repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB:
7878  	SHLL $0x02, R12
7879  	ORL  $0x01, R12
7880  	MOVW R12, (CX)
7881  	ADDQ $0x02, CX
7882  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
7883  
7884  repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
7885  	XORQ SI, SI
7886  	LEAL 1(SI)(R12*4), R12
7887  	MOVB R8, 1(CX)
7888  	SARL $0x08, R8
7889  	SHLL $0x05, R8
7890  	ORL  R8, R12
7891  	MOVB R12, (CX)
7892  	ADDQ $0x02, CX
7893  
7894  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB:
7895  	CMPL DX, 8(SP)
7896  	JAE  emit_remainder_encodeBetterBlockAsm4MB
7897  	CMPQ CX, (SP)
7898  	JB   match_nolit_dst_ok_encodeBetterBlockAsm4MB
7899  	MOVQ $0x00000000, ret+56(FP)
7900  	RET
7901  
7902  match_nolit_dst_ok_encodeBetterBlockAsm4MB:
7903  	MOVQ  $0x00cf1bbcdcbfa563, SI
7904  	MOVQ  $0x9e3779b1, R8
7905  	LEAQ  1(DI), DI
7906  	LEAQ  -2(DX), R9
7907  	MOVQ  (BX)(DI*1), R10
7908  	MOVQ  1(BX)(DI*1), R11
7909  	MOVQ  (BX)(R9*1), R12
7910  	MOVQ  1(BX)(R9*1), R13
7911  	SHLQ  $0x08, R10
7912  	IMULQ SI, R10
7913  	SHRQ  $0x2f, R10
7914  	SHLQ  $0x20, R11
7915  	IMULQ R8, R11
7916  	SHRQ  $0x32, R11
7917  	SHLQ  $0x08, R12
7918  	IMULQ SI, R12
7919  	SHRQ  $0x2f, R12
7920  	SHLQ  $0x20, R13
7921  	IMULQ R8, R13
7922  	SHRQ  $0x32, R13
7923  	LEAQ  1(DI), R8
7924  	LEAQ  1(R9), R14
7925  	MOVL  DI, (AX)(R10*4)
7926  	MOVL  R9, (AX)(R12*4)
7927  	MOVL  R8, 524288(AX)(R11*4)
7928  	MOVL  R14, 524288(AX)(R13*4)
7929  	LEAQ  1(R9)(DI*1), R8
7930  	SHRQ  $0x01, R8
7931  	ADDQ  $0x01, DI
7932  	SUBQ  $0x01, R9
7933  
7934  index_loop_encodeBetterBlockAsm4MB:
7935  	CMPQ  R8, R9
7936  	JAE   search_loop_encodeBetterBlockAsm4MB
7937  	MOVQ  (BX)(DI*1), R10
7938  	MOVQ  (BX)(R8*1), R11
7939  	SHLQ  $0x08, R10
7940  	IMULQ SI, R10
7941  	SHRQ  $0x2f, R10
7942  	SHLQ  $0x08, R11
7943  	IMULQ SI, R11
7944  	SHRQ  $0x2f, R11
7945  	MOVL  DI, (AX)(R10*4)
7946  	MOVL  R8, (AX)(R11*4)
7947  	ADDQ  $0x02, DI
7948  	ADDQ  $0x02, R8
7949  	JMP   index_loop_encodeBetterBlockAsm4MB
7950  
7951  emit_remainder_encodeBetterBlockAsm4MB:
7952  	MOVQ src_len+32(FP), AX
7953  	SUBL 12(SP), AX
7954  	LEAQ 4(CX)(AX*1), AX
7955  	CMPQ AX, (SP)
7956  	JB   emit_remainder_ok_encodeBetterBlockAsm4MB
7957  	MOVQ $0x00000000, ret+56(FP)
7958  	RET
7959  
7960  emit_remainder_ok_encodeBetterBlockAsm4MB:
7961  	MOVQ src_len+32(FP), AX
7962  	MOVL 12(SP), DX
7963  	CMPL DX, AX
7964  	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
7965  	MOVL AX, SI
7966  	MOVL AX, 12(SP)
7967  	LEAQ (BX)(DX*1), AX
7968  	SUBL DX, SI
7969  	LEAL -1(SI), DX
7970  	CMPL DX, $0x3c
7971  	JB   one_byte_emit_remainder_encodeBetterBlockAsm4MB
7972  	CMPL DX, $0x00000100
7973  	JB   two_bytes_emit_remainder_encodeBetterBlockAsm4MB
7974  	CMPL DX, $0x00010000
7975  	JB   three_bytes_emit_remainder_encodeBetterBlockAsm4MB
7976  	MOVL DX, BX
7977  	SHRL $0x10, BX
7978  	MOVB $0xf8, (CX)
7979  	MOVW DX, 1(CX)
7980  	MOVB BL, 3(CX)
7981  	ADDQ $0x04, CX
7982  	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm4MB
7983  
7984  three_bytes_emit_remainder_encodeBetterBlockAsm4MB:
7985  	MOVB $0xf4, (CX)
7986  	MOVW DX, 1(CX)
7987  	ADDQ $0x03, CX
7988  	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm4MB
7989  
7990  two_bytes_emit_remainder_encodeBetterBlockAsm4MB:
7991  	MOVB $0xf0, (CX)
7992  	MOVB DL, 1(CX)
7993  	ADDQ $0x02, CX
7994  	CMPL DX, $0x40
7995  	JB   memmove_emit_remainder_encodeBetterBlockAsm4MB
7996  	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm4MB
7997  
7998  one_byte_emit_remainder_encodeBetterBlockAsm4MB:
7999  	SHLB $0x02, DL
8000  	MOVB DL, (CX)
8001  	ADDQ $0x01, CX
8002  
8003  memmove_emit_remainder_encodeBetterBlockAsm4MB:
8004  	LEAQ (CX)(SI*1), DX
8005  	MOVL SI, BX
8006  
8007  	// genMemMoveShort
8008  	CMPQ BX, $0x03
8009  	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2
8010  	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3
8011  	CMPQ BX, $0x08
8012  	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7
8013  	CMPQ BX, $0x10
8014  	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16
8015  	CMPQ BX, $0x20
8016  	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32
8017  	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64
8018  
8019  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2:
8020  	MOVB (AX), SI
8021  	MOVB -1(AX)(BX*1), AL
8022  	MOVB SI, (CX)
8023  	MOVB AL, -1(CX)(BX*1)
8024  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
8025  
8026  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3:
8027  	MOVW (AX), SI
8028  	MOVB 2(AX), AL
8029  	MOVW SI, (CX)
8030  	MOVB AL, 2(CX)
8031  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
8032  
8033  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7:
8034  	MOVL (AX), SI
8035  	MOVL -4(AX)(BX*1), AX
8036  	MOVL SI, (CX)
8037  	MOVL AX, -4(CX)(BX*1)
8038  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
8039  
8040  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16:
8041  	MOVQ (AX), SI
8042  	MOVQ -8(AX)(BX*1), AX
8043  	MOVQ SI, (CX)
8044  	MOVQ AX, -8(CX)(BX*1)
8045  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
8046  
8047  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32:
8048  	MOVOU (AX), X0
8049  	MOVOU -16(AX)(BX*1), X1
8050  	MOVOU X0, (CX)
8051  	MOVOU X1, -16(CX)(BX*1)
8052  	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
8053  
8054  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64:
8055  	MOVOU (AX), X0
8056  	MOVOU 16(AX), X1
8057  	MOVOU -32(AX)(BX*1), X2
8058  	MOVOU -16(AX)(BX*1), X3
8059  	MOVOU X0, (CX)
8060  	MOVOU X1, 16(CX)
8061  	MOVOU X2, -32(CX)(BX*1)
8062  	MOVOU X3, -16(CX)(BX*1)
8063  
8064  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB:
8065  	MOVQ DX, CX
8066  	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
8067  
8068  memmove_long_emit_remainder_encodeBetterBlockAsm4MB:
8069  	LEAQ (CX)(SI*1), DX
8070  	MOVL SI, BX
8071  
8072  	// genMemMoveLong
8073  	MOVOU (AX), X0
8074  	MOVOU 16(AX), X1
8075  	MOVOU -32(AX)(BX*1), X2
8076  	MOVOU -16(AX)(BX*1), X3
8077  	MOVQ  BX, DI
8078  	SHRQ  $0x05, DI
8079  	MOVQ  CX, SI
8080  	ANDL  $0x0000001f, SI
8081  	MOVQ  $0x00000040, R8
8082  	SUBQ  SI, R8
8083  	DECQ  DI
8084  	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
8085  	LEAQ  -32(AX)(R8*1), SI
8086  	LEAQ  -32(CX)(R8*1), R9
8087  
8088  emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back:
8089  	MOVOU (SI), X4
8090  	MOVOU 16(SI), X5
8091  	MOVOA X4, (R9)
8092  	MOVOA X5, 16(R9)
8093  	ADDQ  $0x20, R9
8094  	ADDQ  $0x20, SI
8095  	ADDQ  $0x20, R8
8096  	DECQ  DI
8097  	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back
8098  
8099  emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
8100  	MOVOU -32(AX)(R8*1), X4
8101  	MOVOU -16(AX)(R8*1), X5
8102  	MOVOA X4, -32(CX)(R8*1)
8103  	MOVOA X5, -16(CX)(R8*1)
8104  	ADDQ  $0x20, R8
8105  	CMPQ  BX, R8
8106  	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
8107  	MOVOU X0, (CX)
8108  	MOVOU X1, 16(CX)
8109  	MOVOU X2, -32(CX)(BX*1)
8110  	MOVOU X3, -16(CX)(BX*1)
8111  	MOVQ  DX, CX
8112  
8113  emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB:
8114  	MOVQ dst_base+0(FP), AX
8115  	SUBQ AX, CX
8116  	MOVQ CX, ret+56(FP)
8117  	RET
8118  
8119  // func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int
8120  // Requires: BMI, SSE2
8121  TEXT ·encodeBetterBlockAsm12B(SB), $24-64
8122  	MOVQ tmp+48(FP), AX
8123  	MOVQ dst_base+0(FP), CX
8124  	MOVQ $0x00000280, DX
8125  	MOVQ AX, BX
8126  	PXOR X0, X0
8127  
8128  zero_loop_encodeBetterBlockAsm12B:
8129  	MOVOU X0, (BX)
8130  	MOVOU X0, 16(BX)
8131  	MOVOU X0, 32(BX)
8132  	MOVOU X0, 48(BX)
8133  	MOVOU X0, 64(BX)
8134  	MOVOU X0, 80(BX)
8135  	MOVOU X0, 96(BX)
8136  	MOVOU X0, 112(BX)
8137  	ADDQ  $0x80, BX
8138  	DECQ  DX
8139  	JNZ   zero_loop_encodeBetterBlockAsm12B
8140  	MOVL  $0x00000000, 12(SP)
8141  	MOVQ  src_len+32(FP), DX
8142  	LEAQ  -6(DX), BX
8143  	LEAQ  -8(DX), SI
8144  	MOVL  SI, 8(SP)
8145  	SHRQ  $0x05, DX
8146  	SUBL  DX, BX
8147  	LEAQ  (CX)(BX*1), BX
8148  	MOVQ  BX, (SP)
8149  	MOVL  $0x00000001, DX
8150  	MOVL  $0x00000000, 16(SP)
8151  	MOVQ  src_base+24(FP), BX
8152  
8153  search_loop_encodeBetterBlockAsm12B:
8154  	MOVL  DX, SI
8155  	SUBL  12(SP), SI
8156  	SHRL  $0x06, SI
8157  	LEAL  1(DX)(SI*1), SI
8158  	CMPL  SI, 8(SP)
8159  	JAE   emit_remainder_encodeBetterBlockAsm12B
8160  	MOVQ  (BX)(DX*1), DI
8161  	MOVL  SI, 20(SP)
8162  	MOVQ  $0x0000cf1bbcdcbf9b, R9
8163  	MOVQ  $0x9e3779b1, SI
8164  	MOVQ  DI, R10
8165  	MOVQ  DI, R11
8166  	SHLQ  $0x10, R10
8167  	IMULQ R9, R10
8168  	SHRQ  $0x32, R10
8169  	SHLQ  $0x20, R11
8170  	IMULQ SI, R11
8171  	SHRQ  $0x34, R11
8172  	MOVL  (AX)(R10*4), SI
8173  	MOVL  65536(AX)(R11*4), R8
8174  	MOVL  DX, (AX)(R10*4)
8175  	MOVL  DX, 65536(AX)(R11*4)
8176  	MOVQ  (BX)(SI*1), R10
8177  	MOVQ  (BX)(R8*1), R11
8178  	CMPQ  R10, DI
8179  	JEQ   candidate_match_encodeBetterBlockAsm12B
8180  	CMPQ  R11, DI
8181  	JNE   no_short_found_encodeBetterBlockAsm12B
8182  	MOVL  R8, SI
8183  	JMP   candidate_match_encodeBetterBlockAsm12B
8184  
8185  no_short_found_encodeBetterBlockAsm12B:
8186  	CMPL R10, DI
8187  	JEQ  candidate_match_encodeBetterBlockAsm12B
8188  	CMPL R11, DI
8189  	JEQ  candidateS_match_encodeBetterBlockAsm12B
8190  	MOVL 20(SP), DX
8191  	JMP  search_loop_encodeBetterBlockAsm12B
8192  
8193  candidateS_match_encodeBetterBlockAsm12B:
8194  	SHRQ  $0x08, DI
8195  	MOVQ  DI, R10
8196  	SHLQ  $0x10, R10
8197  	IMULQ R9, R10
8198  	SHRQ  $0x32, R10
8199  	MOVL  (AX)(R10*4), SI
8200  	INCL  DX
8201  	MOVL  DX, (AX)(R10*4)
8202  	CMPL  (BX)(SI*1), DI
8203  	JEQ   candidate_match_encodeBetterBlockAsm12B
8204  	DECL  DX
8205  	MOVL  R8, SI
8206  
8207  candidate_match_encodeBetterBlockAsm12B:
8208  	MOVL  12(SP), DI
8209  	TESTL SI, SI
8210  	JZ    match_extend_back_end_encodeBetterBlockAsm12B
8211  
8212  match_extend_back_loop_encodeBetterBlockAsm12B:
8213  	CMPL DX, DI
8214  	JBE  match_extend_back_end_encodeBetterBlockAsm12B
8215  	MOVB -1(BX)(SI*1), R8
8216  	MOVB -1(BX)(DX*1), R9
8217  	CMPB R8, R9
8218  	JNE  match_extend_back_end_encodeBetterBlockAsm12B
8219  	LEAL -1(DX), DX
8220  	DECL SI
8221  	JZ   match_extend_back_end_encodeBetterBlockAsm12B
8222  	JMP  match_extend_back_loop_encodeBetterBlockAsm12B
8223  
8224  match_extend_back_end_encodeBetterBlockAsm12B:
8225  	MOVL DX, DI
8226  	SUBL 12(SP), DI
8227  	LEAQ 3(CX)(DI*1), DI
8228  	CMPQ DI, (SP)
8229  	JB   match_dst_size_check_encodeBetterBlockAsm12B
8230  	MOVQ $0x00000000, ret+56(FP)
8231  	RET
8232  
8233  match_dst_size_check_encodeBetterBlockAsm12B:
8234  	MOVL DX, DI
8235  	ADDL $0x04, DX
8236  	ADDL $0x04, SI
8237  	MOVQ src_len+32(FP), R8
8238  	SUBL DX, R8
8239  	LEAQ (BX)(DX*1), R9
8240  	LEAQ (BX)(SI*1), R10
8241  
8242  	// matchLen
8243  	XORL R12, R12
8244  
8245  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B:
8246  	CMPL R8, $0x10
8247  	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm12B
8248  	MOVQ (R9)(R12*1), R11
8249  	MOVQ 8(R9)(R12*1), R13
8250  	XORQ (R10)(R12*1), R11
8251  	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B
8252  	XORQ 8(R10)(R12*1), R13
8253  	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B
8254  	LEAL -16(R8), R8
8255  	LEAL 16(R12), R12
8256  	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B
8257  
8258  matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B:
8259  #ifdef GOAMD64_v3
8260  	TZCNTQ R13, R13
8261  
8262  #else
8263  	BSFQ R13, R13
8264  
8265  #endif
8266  	SARQ $0x03, R13
8267  	LEAL 8(R12)(R13*1), R12
8268  	JMP  match_nolit_end_encodeBetterBlockAsm12B
8269  
8270  matchlen_match8_match_nolit_encodeBetterBlockAsm12B:
8271  	CMPL R8, $0x08
8272  	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm12B
8273  	MOVQ (R9)(R12*1), R11
8274  	XORQ (R10)(R12*1), R11
8275  	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B
8276  	LEAL -8(R8), R8
8277  	LEAL 8(R12), R12
8278  	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm12B
8279  
8280  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B:
8281  #ifdef GOAMD64_v3
8282  	TZCNTQ R11, R11
8283  
8284  #else
8285  	BSFQ R11, R11
8286  
8287  #endif
8288  	SARQ $0x03, R11
8289  	LEAL (R12)(R11*1), R12
8290  	JMP  match_nolit_end_encodeBetterBlockAsm12B
8291  
8292  matchlen_match4_match_nolit_encodeBetterBlockAsm12B:
8293  	CMPL R8, $0x04
8294  	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm12B
8295  	MOVL (R9)(R12*1), R11
8296  	CMPL (R10)(R12*1), R11
8297  	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm12B
8298  	LEAL -4(R8), R8
8299  	LEAL 4(R12), R12
8300  
8301  matchlen_match2_match_nolit_encodeBetterBlockAsm12B:
8302  	CMPL R8, $0x01
8303  	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm12B
8304  	JB   match_nolit_end_encodeBetterBlockAsm12B
8305  	MOVW (R9)(R12*1), R11
8306  	CMPW (R10)(R12*1), R11
8307  	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm12B
8308  	LEAL 2(R12), R12
8309  	SUBL $0x02, R8
8310  	JZ   match_nolit_end_encodeBetterBlockAsm12B
8311  
8312  matchlen_match1_match_nolit_encodeBetterBlockAsm12B:
8313  	MOVB (R9)(R12*1), R11
8314  	CMPB (R10)(R12*1), R11
8315  	JNE  match_nolit_end_encodeBetterBlockAsm12B
8316  	LEAL 1(R12), R12
8317  
8318  match_nolit_end_encodeBetterBlockAsm12B:
8319  	MOVL DX, R8
8320  	SUBL SI, R8
8321  
8322  	// Check if repeat
8323  	CMPL 16(SP), R8
8324  	JEQ  match_is_repeat_encodeBetterBlockAsm12B
8325  	MOVL R8, 16(SP)
8326  	MOVL 12(SP), SI
8327  	CMPL SI, DI
8328  	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm12B
8329  	MOVL DI, R9
8330  	MOVL DI, 12(SP)
8331  	LEAQ (BX)(SI*1), R10
8332  	SUBL SI, R9
8333  	LEAL -1(R9), SI
8334  	CMPL SI, $0x3c
8335  	JB   one_byte_match_emit_encodeBetterBlockAsm12B
8336  	CMPL SI, $0x00000100
8337  	JB   two_bytes_match_emit_encodeBetterBlockAsm12B
8338  	JB   three_bytes_match_emit_encodeBetterBlockAsm12B
8339  
8340  three_bytes_match_emit_encodeBetterBlockAsm12B:
8341  	MOVB $0xf4, (CX)
8342  	MOVW SI, 1(CX)
8343  	ADDQ $0x03, CX
8344  	JMP  memmove_long_match_emit_encodeBetterBlockAsm12B
8345  
8346  two_bytes_match_emit_encodeBetterBlockAsm12B:
8347  	MOVB $0xf0, (CX)
8348  	MOVB SI, 1(CX)
8349  	ADDQ $0x02, CX
8350  	CMPL SI, $0x40
8351  	JB   memmove_match_emit_encodeBetterBlockAsm12B
8352  	JMP  memmove_long_match_emit_encodeBetterBlockAsm12B
8353  
8354  one_byte_match_emit_encodeBetterBlockAsm12B:
8355  	SHLB $0x02, SI
8356  	MOVB SI, (CX)
8357  	ADDQ $0x01, CX
8358  
8359  memmove_match_emit_encodeBetterBlockAsm12B:
8360  	LEAQ (CX)(R9*1), SI
8361  
8362  	// genMemMoveShort
8363  	CMPQ R9, $0x04
8364  	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4
8365  	CMPQ R9, $0x08
8366  	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7
8367  	CMPQ R9, $0x10
8368  	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16
8369  	CMPQ R9, $0x20
8370  	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32
8371  	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64
8372  
8373  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4:
8374  	MOVL (R10), R11
8375  	MOVL R11, (CX)
8376  	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm12B
8377  
8378  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7:
8379  	MOVL (R10), R11
8380  	MOVL -4(R10)(R9*1), R10
8381  	MOVL R11, (CX)
8382  	MOVL R10, -4(CX)(R9*1)
8383  	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm12B
8384  
8385  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16:
8386  	MOVQ (R10), R11
8387  	MOVQ -8(R10)(R9*1), R10
8388  	MOVQ R11, (CX)
8389  	MOVQ R10, -8(CX)(R9*1)
8390  	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm12B
8391  
8392  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32:
8393  	MOVOU (R10), X0
8394  	MOVOU -16(R10)(R9*1), X1
8395  	MOVOU X0, (CX)
8396  	MOVOU X1, -16(CX)(R9*1)
8397  	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm12B
8398  
8399  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64:
8400  	MOVOU (R10), X0
8401  	MOVOU 16(R10), X1
8402  	MOVOU -32(R10)(R9*1), X2
8403  	MOVOU -16(R10)(R9*1), X3
8404  	MOVOU X0, (CX)
8405  	MOVOU X1, 16(CX)
8406  	MOVOU X2, -32(CX)(R9*1)
8407  	MOVOU X3, -16(CX)(R9*1)
8408  
8409  memmove_end_copy_match_emit_encodeBetterBlockAsm12B:
8410  	MOVQ SI, CX
8411  	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm12B
8412  
8413  memmove_long_match_emit_encodeBetterBlockAsm12B:
8414  	LEAQ (CX)(R9*1), SI
8415  
8416  	// genMemMoveLong
8417  	MOVOU (R10), X0
8418  	MOVOU 16(R10), X1
8419  	MOVOU -32(R10)(R9*1), X2
8420  	MOVOU -16(R10)(R9*1), X3
8421  	MOVQ  R9, R13
8422  	SHRQ  $0x05, R13
8423  	MOVQ  CX, R11
8424  	ANDL  $0x0000001f, R11
8425  	MOVQ  $0x00000040, R14
8426  	SUBQ  R11, R14
8427  	DECQ  R13
8428  	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
8429  	LEAQ  -32(R10)(R14*1), R11
8430  	LEAQ  -32(CX)(R14*1), R15
8431  
8432  emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back:
8433  	MOVOU (R11), X4
8434  	MOVOU 16(R11), X5
8435  	MOVOA X4, (R15)
8436  	MOVOA X5, 16(R15)
8437  	ADDQ  $0x20, R15
8438  	ADDQ  $0x20, R11
8439  	ADDQ  $0x20, R14
8440  	DECQ  R13
8441  	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back
8442  
8443  emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
8444  	MOVOU -32(R10)(R14*1), X4
8445  	MOVOU -16(R10)(R14*1), X5
8446  	MOVOA X4, -32(CX)(R14*1)
8447  	MOVOA X5, -16(CX)(R14*1)
8448  	ADDQ  $0x20, R14
8449  	CMPQ  R9, R14
8450  	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
8451  	MOVOU X0, (CX)
8452  	MOVOU X1, 16(CX)
8453  	MOVOU X2, -32(CX)(R9*1)
8454  	MOVOU X3, -16(CX)(R9*1)
8455  	MOVQ  SI, CX
8456  
8457  emit_literal_done_match_emit_encodeBetterBlockAsm12B:
8458  	ADDL R12, DX
8459  	ADDL $0x04, R12
8460  	MOVL DX, 12(SP)
8461  
8462  	// emitCopy
8463  	CMPL R12, $0x40
8464  	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B
8465  	CMPL R8, $0x00000800
8466  	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm12B
8467  	MOVL $0x00000001, SI
8468  	LEAL 16(SI), SI
8469  	MOVB R8, 1(CX)
8470  	SHRL $0x08, R8
8471  	SHLL $0x05, R8
8472  	ORL  R8, SI
8473  	MOVB SI, (CX)
8474  	ADDQ $0x02, CX
8475  	SUBL $0x08, R12
8476  
8477  	// emitRepeat
8478  	LEAL -4(R12), R12
8479  	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
8480  	MOVL R12, SI
8481  	LEAL -4(R12), R12
8482  	CMPL SI, $0x08
8483  	JBE  repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
8484  	CMPL SI, $0x0c
8485  	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
8486  	CMPL R8, $0x00000800
8487  	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
8488  
8489  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
8490  	CMPL R12, $0x00000104
8491  	JB   repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
8492  	LEAL -256(R12), R12
8493  	MOVW $0x0019, (CX)
8494  	MOVW R12, 2(CX)
8495  	ADDQ $0x04, CX
8496  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
8497  
8498  repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
8499  	LEAL -4(R12), R12
8500  	MOVW $0x0015, (CX)
8501  	MOVB R12, 2(CX)
8502  	ADDQ $0x03, CX
8503  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
8504  
8505  repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
8506  	SHLL $0x02, R12
8507  	ORL  $0x01, R12
8508  	MOVW R12, (CX)
8509  	ADDQ $0x02, CX
8510  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
8511  
8512  repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
8513  	XORQ SI, SI
8514  	LEAL 1(SI)(R12*4), R12
8515  	MOVB R8, 1(CX)
8516  	SARL $0x08, R8
8517  	SHLL $0x05, R8
8518  	ORL  R8, R12
8519  	MOVB R12, (CX)
8520  	ADDQ $0x02, CX
8521  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
8522  
8523  long_offset_short_match_nolit_encodeBetterBlockAsm12B:
8524  	MOVB $0xee, (CX)
8525  	MOVW R8, 1(CX)
8526  	LEAL -60(R12), R12
8527  	ADDQ $0x03, CX
8528  
8529  	// emitRepeat
8530  	MOVL R12, SI
8531  	LEAL -4(R12), R12
8532  	CMPL SI, $0x08
8533  	JBE  repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
8534  	CMPL SI, $0x0c
8535  	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
8536  	CMPL R8, $0x00000800
8537  	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
8538  
8539  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
8540  	CMPL R12, $0x00000104
8541  	JB   repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
8542  	LEAL -256(R12), R12
8543  	MOVW $0x0019, (CX)
8544  	MOVW R12, 2(CX)
8545  	ADDQ $0x04, CX
8546  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
8547  
8548  repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
8549  	LEAL -4(R12), R12
8550  	MOVW $0x0015, (CX)
8551  	MOVB R12, 2(CX)
8552  	ADDQ $0x03, CX
8553  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
8554  
8555  repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
8556  	SHLL $0x02, R12
8557  	ORL  $0x01, R12
8558  	MOVW R12, (CX)
8559  	ADDQ $0x02, CX
8560  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
8561  
8562  repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
8563  	XORQ SI, SI
8564  	LEAL 1(SI)(R12*4), R12
8565  	MOVB R8, 1(CX)
8566  	SARL $0x08, R8
8567  	SHLL $0x05, R8
8568  	ORL  R8, R12
8569  	MOVB R12, (CX)
8570  	ADDQ $0x02, CX
8571  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
8572  
8573  two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B:
8574  	MOVL R12, SI
8575  	SHLL $0x02, SI
8576  	CMPL R12, $0x0c
8577  	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm12B
8578  	CMPL R8, $0x00000800
8579  	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm12B
8580  	LEAL -15(SI), SI
8581  	MOVB R8, 1(CX)
8582  	SHRL $0x08, R8
8583  	SHLL $0x05, R8
8584  	ORL  R8, SI
8585  	MOVB SI, (CX)
8586  	ADDQ $0x02, CX
8587  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
8588  
8589  emit_copy_three_match_nolit_encodeBetterBlockAsm12B:
8590  	LEAL -2(SI), SI
8591  	MOVB SI, (CX)
8592  	MOVW R8, 1(CX)
8593  	ADDQ $0x03, CX
8594  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
8595  
8596  match_is_repeat_encodeBetterBlockAsm12B:
8597  	MOVL 12(SP), SI
8598  	CMPL SI, DI
8599  	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
8600  	MOVL DI, R9
8601  	MOVL DI, 12(SP)
8602  	LEAQ (BX)(SI*1), R10
8603  	SUBL SI, R9
8604  	LEAL -1(R9), SI
8605  	CMPL SI, $0x3c
8606  	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm12B
8607  	CMPL SI, $0x00000100
8608  	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm12B
8609  	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm12B
8610  
8611  three_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
8612  	MOVB $0xf4, (CX)
8613  	MOVW SI, 1(CX)
8614  	ADDQ $0x03, CX
8615  	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
8616  
8617  two_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
8618  	MOVB $0xf0, (CX)
8619  	MOVB SI, 1(CX)
8620  	ADDQ $0x02, CX
8621  	CMPL SI, $0x40
8622  	JB   memmove_match_emit_repeat_encodeBetterBlockAsm12B
8623  	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
8624  
8625  one_byte_match_emit_repeat_encodeBetterBlockAsm12B:
8626  	SHLB $0x02, SI
8627  	MOVB SI, (CX)
8628  	ADDQ $0x01, CX
8629  
8630  memmove_match_emit_repeat_encodeBetterBlockAsm12B:
8631  	LEAQ (CX)(R9*1), SI
8632  
8633  	// genMemMoveShort
8634  	CMPQ R9, $0x04
8635  	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4
8636  	CMPQ R9, $0x08
8637  	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7
8638  	CMPQ R9, $0x10
8639  	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16
8640  	CMPQ R9, $0x20
8641  	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32
8642  	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64
8643  
8644  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4:
8645  	MOVL (R10), R11
8646  	MOVL R11, (CX)
8647  	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
8648  
8649  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7:
8650  	MOVL (R10), R11
8651  	MOVL -4(R10)(R9*1), R10
8652  	MOVL R11, (CX)
8653  	MOVL R10, -4(CX)(R9*1)
8654  	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
8655  
8656  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16:
8657  	MOVQ (R10), R11
8658  	MOVQ -8(R10)(R9*1), R10
8659  	MOVQ R11, (CX)
8660  	MOVQ R10, -8(CX)(R9*1)
8661  	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
8662  
8663  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32:
8664  	MOVOU (R10), X0
8665  	MOVOU -16(R10)(R9*1), X1
8666  	MOVOU X0, (CX)
8667  	MOVOU X1, -16(CX)(R9*1)
8668  	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
8669  
8670  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64:
8671  	MOVOU (R10), X0
8672  	MOVOU 16(R10), X1
8673  	MOVOU -32(R10)(R9*1), X2
8674  	MOVOU -16(R10)(R9*1), X3
8675  	MOVOU X0, (CX)
8676  	MOVOU X1, 16(CX)
8677  	MOVOU X2, -32(CX)(R9*1)
8678  	MOVOU X3, -16(CX)(R9*1)
8679  
8680  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B:
8681  	MOVQ SI, CX
8682  	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
8683  
8684  memmove_long_match_emit_repeat_encodeBetterBlockAsm12B:
8685  	LEAQ (CX)(R9*1), SI
8686  
8687  	// genMemMoveLong
8688  	MOVOU (R10), X0
8689  	MOVOU 16(R10), X1
8690  	MOVOU -32(R10)(R9*1), X2
8691  	MOVOU -16(R10)(R9*1), X3
8692  	MOVQ  R9, R13
8693  	SHRQ  $0x05, R13
8694  	MOVQ  CX, R11
8695  	ANDL  $0x0000001f, R11
8696  	MOVQ  $0x00000040, R14
8697  	SUBQ  R11, R14
8698  	DECQ  R13
8699  	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
8700  	LEAQ  -32(R10)(R14*1), R11
8701  	LEAQ  -32(CX)(R14*1), R15
8702  
8703  emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back:
8704  	MOVOU (R11), X4
8705  	MOVOU 16(R11), X5
8706  	MOVOA X4, (R15)
8707  	MOVOA X5, 16(R15)
8708  	ADDQ  $0x20, R15
8709  	ADDQ  $0x20, R11
8710  	ADDQ  $0x20, R14
8711  	DECQ  R13
8712  	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back
8713  
8714  emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
8715  	MOVOU -32(R10)(R14*1), X4
8716  	MOVOU -16(R10)(R14*1), X5
8717  	MOVOA X4, -32(CX)(R14*1)
8718  	MOVOA X5, -16(CX)(R14*1)
8719  	ADDQ  $0x20, R14
8720  	CMPQ  R9, R14
8721  	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
8722  	MOVOU X0, (CX)
8723  	MOVOU X1, 16(CX)
8724  	MOVOU X2, -32(CX)(R9*1)
8725  	MOVOU X3, -16(CX)(R9*1)
8726  	MOVQ  SI, CX
8727  
8728  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B:
8729  	ADDL R12, DX
8730  	ADDL $0x04, R12
8731  	MOVL DX, 12(SP)
8732  
8733  	// emitRepeat
8734  	MOVL R12, SI
8735  	LEAL -4(R12), R12
8736  	CMPL SI, $0x08
8737  	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B
8738  	CMPL SI, $0x0c
8739  	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
8740  	CMPL R8, $0x00000800
8741  	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
8742  
8743  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
8744  	CMPL R12, $0x00000104
8745  	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B
8746  	LEAL -256(R12), R12
8747  	MOVW $0x0019, (CX)
8748  	MOVW R12, 2(CX)
8749  	ADDQ $0x04, CX
8750  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
8751  
8752  repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B:
8753  	LEAL -4(R12), R12
8754  	MOVW $0x0015, (CX)
8755  	MOVB R12, 2(CX)
8756  	ADDQ $0x03, CX
8757  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
8758  
8759  repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B:
8760  	SHLL $0x02, R12
8761  	ORL  $0x01, R12
8762  	MOVW R12, (CX)
8763  	ADDQ $0x02, CX
8764  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
8765  
8766  repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
8767  	XORQ SI, SI
8768  	LEAL 1(SI)(R12*4), R12
8769  	MOVB R8, 1(CX)
8770  	SARL $0x08, R8
8771  	SHLL $0x05, R8
8772  	ORL  R8, R12
8773  	MOVB R12, (CX)
8774  	ADDQ $0x02, CX
8775  
8776  match_nolit_emitcopy_end_encodeBetterBlockAsm12B:
8777  	CMPL DX, 8(SP)
8778  	JAE  emit_remainder_encodeBetterBlockAsm12B
8779  	CMPQ CX, (SP)
8780  	JB   match_nolit_dst_ok_encodeBetterBlockAsm12B
8781  	MOVQ $0x00000000, ret+56(FP)
8782  	RET
8783  
8784  match_nolit_dst_ok_encodeBetterBlockAsm12B:
8785  	MOVQ  $0x0000cf1bbcdcbf9b, SI
8786  	MOVQ  $0x9e3779b1, R8
8787  	LEAQ  1(DI), DI
8788  	LEAQ  -2(DX), R9
8789  	MOVQ  (BX)(DI*1), R10
8790  	MOVQ  1(BX)(DI*1), R11
8791  	MOVQ  (BX)(R9*1), R12
8792  	MOVQ  1(BX)(R9*1), R13
8793  	SHLQ  $0x10, R10
8794  	IMULQ SI, R10
8795  	SHRQ  $0x32, R10
8796  	SHLQ  $0x20, R11
8797  	IMULQ R8, R11
8798  	SHRQ  $0x34, R11
8799  	SHLQ  $0x10, R12
8800  	IMULQ SI, R12
8801  	SHRQ  $0x32, R12
8802  	SHLQ  $0x20, R13
8803  	IMULQ R8, R13
8804  	SHRQ  $0x34, R13
8805  	LEAQ  1(DI), R8
8806  	LEAQ  1(R9), R14
8807  	MOVL  DI, (AX)(R10*4)
8808  	MOVL  R9, (AX)(R12*4)
8809  	MOVL  R8, 65536(AX)(R11*4)
8810  	MOVL  R14, 65536(AX)(R13*4)
8811  	LEAQ  1(R9)(DI*1), R8
8812  	SHRQ  $0x01, R8
8813  	ADDQ  $0x01, DI
8814  	SUBQ  $0x01, R9
8815  
8816  index_loop_encodeBetterBlockAsm12B:
8817  	CMPQ  R8, R9
8818  	JAE   search_loop_encodeBetterBlockAsm12B
8819  	MOVQ  (BX)(DI*1), R10
8820  	MOVQ  (BX)(R8*1), R11
8821  	SHLQ  $0x10, R10
8822  	IMULQ SI, R10
8823  	SHRQ  $0x32, R10
8824  	SHLQ  $0x10, R11
8825  	IMULQ SI, R11
8826  	SHRQ  $0x32, R11
8827  	MOVL  DI, (AX)(R10*4)
8828  	MOVL  R8, (AX)(R11*4)
8829  	ADDQ  $0x02, DI
8830  	ADDQ  $0x02, R8
8831  	JMP   index_loop_encodeBetterBlockAsm12B
8832  
8833  emit_remainder_encodeBetterBlockAsm12B:
8834  	MOVQ src_len+32(FP), AX
8835  	SUBL 12(SP), AX
8836  	LEAQ 3(CX)(AX*1), AX
8837  	CMPQ AX, (SP)
8838  	JB   emit_remainder_ok_encodeBetterBlockAsm12B
8839  	MOVQ $0x00000000, ret+56(FP)
8840  	RET
8841  
8842  emit_remainder_ok_encodeBetterBlockAsm12B:
8843  	MOVQ src_len+32(FP), AX
8844  	MOVL 12(SP), DX
8845  	CMPL DX, AX
8846  	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
8847  	MOVL AX, SI
8848  	MOVL AX, 12(SP)
8849  	LEAQ (BX)(DX*1), AX
8850  	SUBL DX, SI
8851  	LEAL -1(SI), DX
8852  	CMPL DX, $0x3c
8853  	JB   one_byte_emit_remainder_encodeBetterBlockAsm12B
8854  	CMPL DX, $0x00000100
8855  	JB   two_bytes_emit_remainder_encodeBetterBlockAsm12B
8856  	JB   three_bytes_emit_remainder_encodeBetterBlockAsm12B
8857  
8858  three_bytes_emit_remainder_encodeBetterBlockAsm12B:
8859  	MOVB $0xf4, (CX)
8860  	MOVW DX, 1(CX)
8861  	ADDQ $0x03, CX
8862  	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm12B
8863  
8864  two_bytes_emit_remainder_encodeBetterBlockAsm12B:
8865  	MOVB $0xf0, (CX)
8866  	MOVB DL, 1(CX)
8867  	ADDQ $0x02, CX
8868  	CMPL DX, $0x40
8869  	JB   memmove_emit_remainder_encodeBetterBlockAsm12B
8870  	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm12B
8871  
8872  one_byte_emit_remainder_encodeBetterBlockAsm12B:
8873  	SHLB $0x02, DL
8874  	MOVB DL, (CX)
8875  	ADDQ $0x01, CX
8876  
8877  memmove_emit_remainder_encodeBetterBlockAsm12B:
8878  	LEAQ (CX)(SI*1), DX
8879  	MOVL SI, BX
8880  
8881  	// genMemMoveShort
8882  	CMPQ BX, $0x03
8883  	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2
8884  	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3
8885  	CMPQ BX, $0x08
8886  	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7
8887  	CMPQ BX, $0x10
8888  	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16
8889  	CMPQ BX, $0x20
8890  	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32
8891  	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64
8892  
8893  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2:
8894  	MOVB (AX), SI
8895  	MOVB -1(AX)(BX*1), AL
8896  	MOVB SI, (CX)
8897  	MOVB AL, -1(CX)(BX*1)
8898  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
8899  
8900  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3:
8901  	MOVW (AX), SI
8902  	MOVB 2(AX), AL
8903  	MOVW SI, (CX)
8904  	MOVB AL, 2(CX)
8905  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
8906  
8907  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7:
8908  	MOVL (AX), SI
8909  	MOVL -4(AX)(BX*1), AX
8910  	MOVL SI, (CX)
8911  	MOVL AX, -4(CX)(BX*1)
8912  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
8913  
8914  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16:
8915  	MOVQ (AX), SI
8916  	MOVQ -8(AX)(BX*1), AX
8917  	MOVQ SI, (CX)
8918  	MOVQ AX, -8(CX)(BX*1)
8919  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
8920  
8921  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32:
8922  	MOVOU (AX), X0
8923  	MOVOU -16(AX)(BX*1), X1
8924  	MOVOU X0, (CX)
8925  	MOVOU X1, -16(CX)(BX*1)
8926  	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
8927  
8928  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64:
8929  	MOVOU (AX), X0
8930  	MOVOU 16(AX), X1
8931  	MOVOU -32(AX)(BX*1), X2
8932  	MOVOU -16(AX)(BX*1), X3
8933  	MOVOU X0, (CX)
8934  	MOVOU X1, 16(CX)
8935  	MOVOU X2, -32(CX)(BX*1)
8936  	MOVOU X3, -16(CX)(BX*1)
8937  
8938  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B:
8939  	MOVQ DX, CX
8940  	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
8941  
8942  memmove_long_emit_remainder_encodeBetterBlockAsm12B:
8943  	LEAQ (CX)(SI*1), DX
8944  	MOVL SI, BX
8945  
8946  	// genMemMoveLong
8947  	MOVOU (AX), X0
8948  	MOVOU 16(AX), X1
8949  	MOVOU -32(AX)(BX*1), X2
8950  	MOVOU -16(AX)(BX*1), X3
8951  	MOVQ  BX, DI
8952  	SHRQ  $0x05, DI
8953  	MOVQ  CX, SI
8954  	ANDL  $0x0000001f, SI
8955  	MOVQ  $0x00000040, R8
8956  	SUBQ  SI, R8
8957  	DECQ  DI
8958  	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
8959  	LEAQ  -32(AX)(R8*1), SI
8960  	LEAQ  -32(CX)(R8*1), R9
8961  
8962  emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back:
8963  	MOVOU (SI), X4
8964  	MOVOU 16(SI), X5
8965  	MOVOA X4, (R9)
8966  	MOVOA X5, 16(R9)
8967  	ADDQ  $0x20, R9
8968  	ADDQ  $0x20, SI
8969  	ADDQ  $0x20, R8
8970  	DECQ  DI
8971  	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back
8972  
8973  emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
8974  	MOVOU -32(AX)(R8*1), X4
8975  	MOVOU -16(AX)(R8*1), X5
8976  	MOVOA X4, -32(CX)(R8*1)
8977  	MOVOA X5, -16(CX)(R8*1)
8978  	ADDQ  $0x20, R8
8979  	CMPQ  BX, R8
8980  	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
8981  	MOVOU X0, (CX)
8982  	MOVOU X1, 16(CX)
8983  	MOVOU X2, -32(CX)(BX*1)
8984  	MOVOU X3, -16(CX)(BX*1)
8985  	MOVQ  DX, CX
8986  
8987  emit_literal_done_emit_remainder_encodeBetterBlockAsm12B:
8988  	MOVQ dst_base+0(FP), AX
8989  	SUBQ AX, CX
8990  	MOVQ CX, ret+56(FP)
8991  	RET
8992  
8993  // func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int
8994  // Requires: BMI, SSE2
8995  TEXT ·encodeBetterBlockAsm10B(SB), $24-64
8996  	MOVQ tmp+48(FP), AX
8997  	MOVQ dst_base+0(FP), CX
8998  	MOVQ $0x000000a0, DX
8999  	MOVQ AX, BX
9000  	PXOR X0, X0
9001  
9002  zero_loop_encodeBetterBlockAsm10B:
9003  	MOVOU X0, (BX)
9004  	MOVOU X0, 16(BX)
9005  	MOVOU X0, 32(BX)
9006  	MOVOU X0, 48(BX)
9007  	MOVOU X0, 64(BX)
9008  	MOVOU X0, 80(BX)
9009  	MOVOU X0, 96(BX)
9010  	MOVOU X0, 112(BX)
9011  	ADDQ  $0x80, BX
9012  	DECQ  DX
9013  	JNZ   zero_loop_encodeBetterBlockAsm10B
9014  	MOVL  $0x00000000, 12(SP)
9015  	MOVQ  src_len+32(FP), DX
9016  	LEAQ  -6(DX), BX
9017  	LEAQ  -8(DX), SI
9018  	MOVL  SI, 8(SP)
9019  	SHRQ  $0x05, DX
9020  	SUBL  DX, BX
9021  	LEAQ  (CX)(BX*1), BX
9022  	MOVQ  BX, (SP)
9023  	MOVL  $0x00000001, DX
9024  	MOVL  $0x00000000, 16(SP)
9025  	MOVQ  src_base+24(FP), BX
9026  
9027  search_loop_encodeBetterBlockAsm10B:
9028  	MOVL  DX, SI
9029  	SUBL  12(SP), SI
9030  	SHRL  $0x05, SI
9031  	LEAL  1(DX)(SI*1), SI
9032  	CMPL  SI, 8(SP)
9033  	JAE   emit_remainder_encodeBetterBlockAsm10B
9034  	MOVQ  (BX)(DX*1), DI
9035  	MOVL  SI, 20(SP)
9036  	MOVQ  $0x0000cf1bbcdcbf9b, R9
9037  	MOVQ  $0x9e3779b1, SI
9038  	MOVQ  DI, R10
9039  	MOVQ  DI, R11
9040  	SHLQ  $0x10, R10
9041  	IMULQ R9, R10
9042  	SHRQ  $0x34, R10
9043  	SHLQ  $0x20, R11
9044  	IMULQ SI, R11
9045  	SHRQ  $0x36, R11
9046  	MOVL  (AX)(R10*4), SI
9047  	MOVL  16384(AX)(R11*4), R8
9048  	MOVL  DX, (AX)(R10*4)
9049  	MOVL  DX, 16384(AX)(R11*4)
9050  	MOVQ  (BX)(SI*1), R10
9051  	MOVQ  (BX)(R8*1), R11
9052  	CMPQ  R10, DI
9053  	JEQ   candidate_match_encodeBetterBlockAsm10B
9054  	CMPQ  R11, DI
9055  	JNE   no_short_found_encodeBetterBlockAsm10B
9056  	MOVL  R8, SI
9057  	JMP   candidate_match_encodeBetterBlockAsm10B
9058  
9059  no_short_found_encodeBetterBlockAsm10B:
9060  	CMPL R10, DI
9061  	JEQ  candidate_match_encodeBetterBlockAsm10B
9062  	CMPL R11, DI
9063  	JEQ  candidateS_match_encodeBetterBlockAsm10B
9064  	MOVL 20(SP), DX
9065  	JMP  search_loop_encodeBetterBlockAsm10B
9066  
9067  candidateS_match_encodeBetterBlockAsm10B:
9068  	SHRQ  $0x08, DI
9069  	MOVQ  DI, R10
9070  	SHLQ  $0x10, R10
9071  	IMULQ R9, R10
9072  	SHRQ  $0x34, R10
9073  	MOVL  (AX)(R10*4), SI
9074  	INCL  DX
9075  	MOVL  DX, (AX)(R10*4)
9076  	CMPL  (BX)(SI*1), DI
9077  	JEQ   candidate_match_encodeBetterBlockAsm10B
9078  	DECL  DX
9079  	MOVL  R8, SI
9080  
9081  candidate_match_encodeBetterBlockAsm10B:
9082  	MOVL  12(SP), DI
9083  	TESTL SI, SI
9084  	JZ    match_extend_back_end_encodeBetterBlockAsm10B
9085  
9086  match_extend_back_loop_encodeBetterBlockAsm10B:
9087  	CMPL DX, DI
9088  	JBE  match_extend_back_end_encodeBetterBlockAsm10B
9089  	MOVB -1(BX)(SI*1), R8
9090  	MOVB -1(BX)(DX*1), R9
9091  	CMPB R8, R9
9092  	JNE  match_extend_back_end_encodeBetterBlockAsm10B
9093  	LEAL -1(DX), DX
9094  	DECL SI
9095  	JZ   match_extend_back_end_encodeBetterBlockAsm10B
9096  	JMP  match_extend_back_loop_encodeBetterBlockAsm10B
9097  
9098  match_extend_back_end_encodeBetterBlockAsm10B:
9099  	MOVL DX, DI
9100  	SUBL 12(SP), DI
9101  	LEAQ 3(CX)(DI*1), DI
9102  	CMPQ DI, (SP)
9103  	JB   match_dst_size_check_encodeBetterBlockAsm10B
9104  	MOVQ $0x00000000, ret+56(FP)
9105  	RET
9106  
9107  match_dst_size_check_encodeBetterBlockAsm10B:
9108  	MOVL DX, DI
9109  	ADDL $0x04, DX
9110  	ADDL $0x04, SI
9111  	MOVQ src_len+32(FP), R8
9112  	SUBL DX, R8
9113  	LEAQ (BX)(DX*1), R9
9114  	LEAQ (BX)(SI*1), R10
9115  
9116  	// matchLen
9117  	XORL R12, R12
9118  
9119  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B:
9120  	CMPL R8, $0x10
9121  	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm10B
9122  	MOVQ (R9)(R12*1), R11
9123  	MOVQ 8(R9)(R12*1), R13
9124  	XORQ (R10)(R12*1), R11
9125  	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B
9126  	XORQ 8(R10)(R12*1), R13
9127  	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B
9128  	LEAL -16(R8), R8
9129  	LEAL 16(R12), R12
9130  	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B
9131  
9132  matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B:
9133  #ifdef GOAMD64_v3
9134  	TZCNTQ R13, R13
9135  
9136  #else
9137  	BSFQ R13, R13
9138  
9139  #endif
9140  	SARQ $0x03, R13
9141  	LEAL 8(R12)(R13*1), R12
9142  	JMP  match_nolit_end_encodeBetterBlockAsm10B
9143  
9144  matchlen_match8_match_nolit_encodeBetterBlockAsm10B:
9145  	CMPL R8, $0x08
9146  	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm10B
9147  	MOVQ (R9)(R12*1), R11
9148  	XORQ (R10)(R12*1), R11
9149  	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B
9150  	LEAL -8(R8), R8
9151  	LEAL 8(R12), R12
9152  	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm10B
9153  
9154  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B:
9155  #ifdef GOAMD64_v3
9156  	TZCNTQ R11, R11
9157  
9158  #else
9159  	BSFQ R11, R11
9160  
9161  #endif
9162  	SARQ $0x03, R11
9163  	LEAL (R12)(R11*1), R12
9164  	JMP  match_nolit_end_encodeBetterBlockAsm10B
9165  
9166  matchlen_match4_match_nolit_encodeBetterBlockAsm10B:
9167  	CMPL R8, $0x04
9168  	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm10B
9169  	MOVL (R9)(R12*1), R11
9170  	CMPL (R10)(R12*1), R11
9171  	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm10B
9172  	LEAL -4(R8), R8
9173  	LEAL 4(R12), R12
9174  
9175  matchlen_match2_match_nolit_encodeBetterBlockAsm10B:
9176  	CMPL R8, $0x01
9177  	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm10B
9178  	JB   match_nolit_end_encodeBetterBlockAsm10B
9179  	MOVW (R9)(R12*1), R11
9180  	CMPW (R10)(R12*1), R11
9181  	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm10B
9182  	LEAL 2(R12), R12
9183  	SUBL $0x02, R8
9184  	JZ   match_nolit_end_encodeBetterBlockAsm10B
9185  
9186  matchlen_match1_match_nolit_encodeBetterBlockAsm10B:
9187  	MOVB (R9)(R12*1), R11
9188  	CMPB (R10)(R12*1), R11
9189  	JNE  match_nolit_end_encodeBetterBlockAsm10B
9190  	LEAL 1(R12), R12
9191  
9192  match_nolit_end_encodeBetterBlockAsm10B:
9193  	MOVL DX, R8
9194  	SUBL SI, R8
9195  
9196  	// Check if repeat
9197  	CMPL 16(SP), R8
9198  	JEQ  match_is_repeat_encodeBetterBlockAsm10B
9199  	MOVL R8, 16(SP)
9200  	MOVL 12(SP), SI
9201  	CMPL SI, DI
9202  	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm10B
9203  	MOVL DI, R9
9204  	MOVL DI, 12(SP)
9205  	LEAQ (BX)(SI*1), R10
9206  	SUBL SI, R9
9207  	LEAL -1(R9), SI
9208  	CMPL SI, $0x3c
9209  	JB   one_byte_match_emit_encodeBetterBlockAsm10B
9210  	CMPL SI, $0x00000100
9211  	JB   two_bytes_match_emit_encodeBetterBlockAsm10B
9212  	JB   three_bytes_match_emit_encodeBetterBlockAsm10B
9213  
9214  three_bytes_match_emit_encodeBetterBlockAsm10B:
9215  	MOVB $0xf4, (CX)
9216  	MOVW SI, 1(CX)
9217  	ADDQ $0x03, CX
9218  	JMP  memmove_long_match_emit_encodeBetterBlockAsm10B
9219  
9220  two_bytes_match_emit_encodeBetterBlockAsm10B:
9221  	MOVB $0xf0, (CX)
9222  	MOVB SI, 1(CX)
9223  	ADDQ $0x02, CX
9224  	CMPL SI, $0x40
9225  	JB   memmove_match_emit_encodeBetterBlockAsm10B
9226  	JMP  memmove_long_match_emit_encodeBetterBlockAsm10B
9227  
9228  one_byte_match_emit_encodeBetterBlockAsm10B:
9229  	SHLB $0x02, SI
9230  	MOVB SI, (CX)
9231  	ADDQ $0x01, CX
9232  
9233  memmove_match_emit_encodeBetterBlockAsm10B:
9234  	LEAQ (CX)(R9*1), SI
9235  
9236  	// genMemMoveShort
9237  	CMPQ R9, $0x04
9238  	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4
9239  	CMPQ R9, $0x08
9240  	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7
9241  	CMPQ R9, $0x10
9242  	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16
9243  	CMPQ R9, $0x20
9244  	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32
9245  	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64
9246  
9247  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4:
9248  	MOVL (R10), R11
9249  	MOVL R11, (CX)
9250  	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm10B
9251  
9252  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7:
9253  	MOVL (R10), R11
9254  	MOVL -4(R10)(R9*1), R10
9255  	MOVL R11, (CX)
9256  	MOVL R10, -4(CX)(R9*1)
9257  	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm10B
9258  
9259  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16:
9260  	MOVQ (R10), R11
9261  	MOVQ -8(R10)(R9*1), R10
9262  	MOVQ R11, (CX)
9263  	MOVQ R10, -8(CX)(R9*1)
9264  	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm10B
9265  
9266  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32:
9267  	MOVOU (R10), X0
9268  	MOVOU -16(R10)(R9*1), X1
9269  	MOVOU X0, (CX)
9270  	MOVOU X1, -16(CX)(R9*1)
9271  	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm10B
9272  
9273  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64:
9274  	MOVOU (R10), X0
9275  	MOVOU 16(R10), X1
9276  	MOVOU -32(R10)(R9*1), X2
9277  	MOVOU -16(R10)(R9*1), X3
9278  	MOVOU X0, (CX)
9279  	MOVOU X1, 16(CX)
9280  	MOVOU X2, -32(CX)(R9*1)
9281  	MOVOU X3, -16(CX)(R9*1)
9282  
9283  memmove_end_copy_match_emit_encodeBetterBlockAsm10B:
9284  	MOVQ SI, CX
9285  	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm10B
9286  
9287  memmove_long_match_emit_encodeBetterBlockAsm10B:
9288  	LEAQ (CX)(R9*1), SI
9289  
9290  	// genMemMoveLong
9291  	MOVOU (R10), X0
9292  	MOVOU 16(R10), X1
9293  	MOVOU -32(R10)(R9*1), X2
9294  	MOVOU -16(R10)(R9*1), X3
9295  	MOVQ  R9, R13
9296  	SHRQ  $0x05, R13
9297  	MOVQ  CX, R11
9298  	ANDL  $0x0000001f, R11
9299  	MOVQ  $0x00000040, R14
9300  	SUBQ  R11, R14
9301  	DECQ  R13
9302  	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
9303  	LEAQ  -32(R10)(R14*1), R11
9304  	LEAQ  -32(CX)(R14*1), R15
9305  
9306  emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back:
9307  	MOVOU (R11), X4
9308  	MOVOU 16(R11), X5
9309  	MOVOA X4, (R15)
9310  	MOVOA X5, 16(R15)
9311  	ADDQ  $0x20, R15
9312  	ADDQ  $0x20, R11
9313  	ADDQ  $0x20, R14
9314  	DECQ  R13
9315  	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back
9316  
9317  emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
9318  	MOVOU -32(R10)(R14*1), X4
9319  	MOVOU -16(R10)(R14*1), X5
9320  	MOVOA X4, -32(CX)(R14*1)
9321  	MOVOA X5, -16(CX)(R14*1)
9322  	ADDQ  $0x20, R14
9323  	CMPQ  R9, R14
9324  	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
9325  	MOVOU X0, (CX)
9326  	MOVOU X1, 16(CX)
9327  	MOVOU X2, -32(CX)(R9*1)
9328  	MOVOU X3, -16(CX)(R9*1)
9329  	MOVQ  SI, CX
9330  
9331  emit_literal_done_match_emit_encodeBetterBlockAsm10B:
9332  	ADDL R12, DX
9333  	ADDL $0x04, R12
9334  	MOVL DX, 12(SP)
9335  
9336  	// emitCopy
9337  	CMPL R12, $0x40
9338  	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B
9339  	CMPL R8, $0x00000800
9340  	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm10B
9341  	MOVL $0x00000001, SI
9342  	LEAL 16(SI), SI
9343  	MOVB R8, 1(CX)
9344  	SHRL $0x08, R8
9345  	SHLL $0x05, R8
9346  	ORL  R8, SI
9347  	MOVB SI, (CX)
9348  	ADDQ $0x02, CX
9349  	SUBL $0x08, R12
9350  
9351  	// emitRepeat
9352  	LEAL -4(R12), R12
9353  	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
9354  	MOVL R12, SI
9355  	LEAL -4(R12), R12
9356  	CMPL SI, $0x08
9357  	JBE  repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
9358  	CMPL SI, $0x0c
9359  	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
9360  	CMPL R8, $0x00000800
9361  	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
9362  
9363  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
9364  	CMPL R12, $0x00000104
9365  	JB   repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
9366  	LEAL -256(R12), R12
9367  	MOVW $0x0019, (CX)
9368  	MOVW R12, 2(CX)
9369  	ADDQ $0x04, CX
9370  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
9371  
9372  repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
9373  	LEAL -4(R12), R12
9374  	MOVW $0x0015, (CX)
9375  	MOVB R12, 2(CX)
9376  	ADDQ $0x03, CX
9377  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
9378  
9379  repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
9380  	SHLL $0x02, R12
9381  	ORL  $0x01, R12
9382  	MOVW R12, (CX)
9383  	ADDQ $0x02, CX
9384  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
9385  
9386  repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
9387  	XORQ SI, SI
9388  	LEAL 1(SI)(R12*4), R12
9389  	MOVB R8, 1(CX)
9390  	SARL $0x08, R8
9391  	SHLL $0x05, R8
9392  	ORL  R8, R12
9393  	MOVB R12, (CX)
9394  	ADDQ $0x02, CX
9395  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
9396  
9397  long_offset_short_match_nolit_encodeBetterBlockAsm10B:
9398  	MOVB $0xee, (CX)
9399  	MOVW R8, 1(CX)
9400  	LEAL -60(R12), R12
9401  	ADDQ $0x03, CX
9402  
9403  	// emitRepeat
9404  	MOVL R12, SI
9405  	LEAL -4(R12), R12
9406  	CMPL SI, $0x08
9407  	JBE  repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
9408  	CMPL SI, $0x0c
9409  	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
9410  	CMPL R8, $0x00000800
9411  	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
9412  
9413  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
9414  	CMPL R12, $0x00000104
9415  	JB   repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
9416  	LEAL -256(R12), R12
9417  	MOVW $0x0019, (CX)
9418  	MOVW R12, 2(CX)
9419  	ADDQ $0x04, CX
9420  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
9421  
9422  repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
9423  	LEAL -4(R12), R12
9424  	MOVW $0x0015, (CX)
9425  	MOVB R12, 2(CX)
9426  	ADDQ $0x03, CX
9427  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
9428  
9429  repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
9430  	SHLL $0x02, R12
9431  	ORL  $0x01, R12
9432  	MOVW R12, (CX)
9433  	ADDQ $0x02, CX
9434  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
9435  
9436  repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
9437  	XORQ SI, SI
9438  	LEAL 1(SI)(R12*4), R12
9439  	MOVB R8, 1(CX)
9440  	SARL $0x08, R8
9441  	SHLL $0x05, R8
9442  	ORL  R8, R12
9443  	MOVB R12, (CX)
9444  	ADDQ $0x02, CX
9445  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
9446  
9447  two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B:
9448  	MOVL R12, SI
9449  	SHLL $0x02, SI
9450  	CMPL R12, $0x0c
9451  	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm10B
9452  	CMPL R8, $0x00000800
9453  	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm10B
9454  	LEAL -15(SI), SI
9455  	MOVB R8, 1(CX)
9456  	SHRL $0x08, R8
9457  	SHLL $0x05, R8
9458  	ORL  R8, SI
9459  	MOVB SI, (CX)
9460  	ADDQ $0x02, CX
9461  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
9462  
9463  emit_copy_three_match_nolit_encodeBetterBlockAsm10B:
9464  	LEAL -2(SI), SI
9465  	MOVB SI, (CX)
9466  	MOVW R8, 1(CX)
9467  	ADDQ $0x03, CX
9468  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
9469  
9470  match_is_repeat_encodeBetterBlockAsm10B:
9471  	MOVL 12(SP), SI
9472  	CMPL SI, DI
9473  	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
9474  	MOVL DI, R9
9475  	MOVL DI, 12(SP)
9476  	LEAQ (BX)(SI*1), R10
9477  	SUBL SI, R9
9478  	LEAL -1(R9), SI
9479  	CMPL SI, $0x3c
9480  	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm10B
9481  	CMPL SI, $0x00000100
9482  	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm10B
9483  	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm10B
9484  
9485  three_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
9486  	MOVB $0xf4, (CX)
9487  	MOVW SI, 1(CX)
9488  	ADDQ $0x03, CX
9489  	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
9490  
9491  two_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
9492  	MOVB $0xf0, (CX)
9493  	MOVB SI, 1(CX)
9494  	ADDQ $0x02, CX
9495  	CMPL SI, $0x40
9496  	JB   memmove_match_emit_repeat_encodeBetterBlockAsm10B
9497  	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
9498  
9499  one_byte_match_emit_repeat_encodeBetterBlockAsm10B:
9500  	SHLB $0x02, SI
9501  	MOVB SI, (CX)
9502  	ADDQ $0x01, CX
9503  
9504  memmove_match_emit_repeat_encodeBetterBlockAsm10B:
9505  	LEAQ (CX)(R9*1), SI
9506  
9507  	// genMemMoveShort
9508  	CMPQ R9, $0x04
9509  	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4
9510  	CMPQ R9, $0x08
9511  	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7
9512  	CMPQ R9, $0x10
9513  	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16
9514  	CMPQ R9, $0x20
9515  	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32
9516  	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64
9517  
9518  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4:
9519  	MOVL (R10), R11
9520  	MOVL R11, (CX)
9521  	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
9522  
9523  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7:
9524  	MOVL (R10), R11
9525  	MOVL -4(R10)(R9*1), R10
9526  	MOVL R11, (CX)
9527  	MOVL R10, -4(CX)(R9*1)
9528  	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
9529  
9530  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16:
9531  	MOVQ (R10), R11
9532  	MOVQ -8(R10)(R9*1), R10
9533  	MOVQ R11, (CX)
9534  	MOVQ R10, -8(CX)(R9*1)
9535  	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
9536  
9537  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32:
9538  	MOVOU (R10), X0
9539  	MOVOU -16(R10)(R9*1), X1
9540  	MOVOU X0, (CX)
9541  	MOVOU X1, -16(CX)(R9*1)
9542  	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
9543  
9544  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64:
9545  	MOVOU (R10), X0
9546  	MOVOU 16(R10), X1
9547  	MOVOU -32(R10)(R9*1), X2
9548  	MOVOU -16(R10)(R9*1), X3
9549  	MOVOU X0, (CX)
9550  	MOVOU X1, 16(CX)
9551  	MOVOU X2, -32(CX)(R9*1)
9552  	MOVOU X3, -16(CX)(R9*1)
9553  
9554  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B:
9555  	MOVQ SI, CX
9556  	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
9557  
9558  memmove_long_match_emit_repeat_encodeBetterBlockAsm10B:
9559  	LEAQ (CX)(R9*1), SI
9560  
9561  	// genMemMoveLong
9562  	MOVOU (R10), X0
9563  	MOVOU 16(R10), X1
9564  	MOVOU -32(R10)(R9*1), X2
9565  	MOVOU -16(R10)(R9*1), X3
9566  	MOVQ  R9, R13
9567  	SHRQ  $0x05, R13
9568  	MOVQ  CX, R11
9569  	ANDL  $0x0000001f, R11
9570  	MOVQ  $0x00000040, R14
9571  	SUBQ  R11, R14
9572  	DECQ  R13
9573  	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
9574  	LEAQ  -32(R10)(R14*1), R11
9575  	LEAQ  -32(CX)(R14*1), R15
9576  
9577  emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back:
9578  	MOVOU (R11), X4
9579  	MOVOU 16(R11), X5
9580  	MOVOA X4, (R15)
9581  	MOVOA X5, 16(R15)
9582  	ADDQ  $0x20, R15
9583  	ADDQ  $0x20, R11
9584  	ADDQ  $0x20, R14
9585  	DECQ  R13
9586  	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back
9587  
9588  emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
9589  	MOVOU -32(R10)(R14*1), X4
9590  	MOVOU -16(R10)(R14*1), X5
9591  	MOVOA X4, -32(CX)(R14*1)
9592  	MOVOA X5, -16(CX)(R14*1)
9593  	ADDQ  $0x20, R14
9594  	CMPQ  R9, R14
9595  	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
9596  	MOVOU X0, (CX)
9597  	MOVOU X1, 16(CX)
9598  	MOVOU X2, -32(CX)(R9*1)
9599  	MOVOU X3, -16(CX)(R9*1)
9600  	MOVQ  SI, CX
9601  
9602  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B:
9603  	ADDL R12, DX
9604  	ADDL $0x04, R12
9605  	MOVL DX, 12(SP)
9606  
9607  	// emitRepeat
9608  	MOVL R12, SI
9609  	LEAL -4(R12), R12
9610  	CMPL SI, $0x08
9611  	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B
9612  	CMPL SI, $0x0c
9613  	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
9614  	CMPL R8, $0x00000800
9615  	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
9616  
9617  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
9618  	CMPL R12, $0x00000104
9619  	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B
9620  	LEAL -256(R12), R12
9621  	MOVW $0x0019, (CX)
9622  	MOVW R12, 2(CX)
9623  	ADDQ $0x04, CX
9624  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
9625  
9626  repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B:
9627  	LEAL -4(R12), R12
9628  	MOVW $0x0015, (CX)
9629  	MOVB R12, 2(CX)
9630  	ADDQ $0x03, CX
9631  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
9632  
9633  repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B:
9634  	SHLL $0x02, R12
9635  	ORL  $0x01, R12
9636  	MOVW R12, (CX)
9637  	ADDQ $0x02, CX
9638  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
9639  
9640  repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
9641  	XORQ SI, SI
9642  	LEAL 1(SI)(R12*4), R12
9643  	MOVB R8, 1(CX)
9644  	SARL $0x08, R8
9645  	SHLL $0x05, R8
9646  	ORL  R8, R12
9647  	MOVB R12, (CX)
9648  	ADDQ $0x02, CX
9649  
9650  match_nolit_emitcopy_end_encodeBetterBlockAsm10B:
9651  	CMPL DX, 8(SP)
9652  	JAE  emit_remainder_encodeBetterBlockAsm10B
9653  	CMPQ CX, (SP)
9654  	JB   match_nolit_dst_ok_encodeBetterBlockAsm10B
9655  	MOVQ $0x00000000, ret+56(FP)
9656  	RET
9657  
9658  match_nolit_dst_ok_encodeBetterBlockAsm10B:
9659  	MOVQ  $0x0000cf1bbcdcbf9b, SI
9660  	MOVQ  $0x9e3779b1, R8
9661  	LEAQ  1(DI), DI
9662  	LEAQ  -2(DX), R9
9663  	MOVQ  (BX)(DI*1), R10
9664  	MOVQ  1(BX)(DI*1), R11
9665  	MOVQ  (BX)(R9*1), R12
9666  	MOVQ  1(BX)(R9*1), R13
9667  	SHLQ  $0x10, R10
9668  	IMULQ SI, R10
9669  	SHRQ  $0x34, R10
9670  	SHLQ  $0x20, R11
9671  	IMULQ R8, R11
9672  	SHRQ  $0x36, R11
9673  	SHLQ  $0x10, R12
9674  	IMULQ SI, R12
9675  	SHRQ  $0x34, R12
9676  	SHLQ  $0x20, R13
9677  	IMULQ R8, R13
9678  	SHRQ  $0x36, R13
9679  	LEAQ  1(DI), R8
9680  	LEAQ  1(R9), R14
9681  	MOVL  DI, (AX)(R10*4)
9682  	MOVL  R9, (AX)(R12*4)
9683  	MOVL  R8, 16384(AX)(R11*4)
9684  	MOVL  R14, 16384(AX)(R13*4)
9685  	LEAQ  1(R9)(DI*1), R8
9686  	SHRQ  $0x01, R8
9687  	ADDQ  $0x01, DI
9688  	SUBQ  $0x01, R9
9689  
9690  index_loop_encodeBetterBlockAsm10B:
9691  	CMPQ  R8, R9
9692  	JAE   search_loop_encodeBetterBlockAsm10B
9693  	MOVQ  (BX)(DI*1), R10
9694  	MOVQ  (BX)(R8*1), R11
9695  	SHLQ  $0x10, R10
9696  	IMULQ SI, R10
9697  	SHRQ  $0x34, R10
9698  	SHLQ  $0x10, R11
9699  	IMULQ SI, R11
9700  	SHRQ  $0x34, R11
9701  	MOVL  DI, (AX)(R10*4)
9702  	MOVL  R8, (AX)(R11*4)
9703  	ADDQ  $0x02, DI
9704  	ADDQ  $0x02, R8
9705  	JMP   index_loop_encodeBetterBlockAsm10B
9706  
9707  emit_remainder_encodeBetterBlockAsm10B:
9708  	MOVQ src_len+32(FP), AX
9709  	SUBL 12(SP), AX
9710  	LEAQ 3(CX)(AX*1), AX
9711  	CMPQ AX, (SP)
9712  	JB   emit_remainder_ok_encodeBetterBlockAsm10B
9713  	MOVQ $0x00000000, ret+56(FP)
9714  	RET
9715  
9716  emit_remainder_ok_encodeBetterBlockAsm10B:
9717  	MOVQ src_len+32(FP), AX
9718  	MOVL 12(SP), DX
9719  	CMPL DX, AX
9720  	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
9721  	MOVL AX, SI
9722  	MOVL AX, 12(SP)
9723  	LEAQ (BX)(DX*1), AX
9724  	SUBL DX, SI
9725  	LEAL -1(SI), DX
9726  	CMPL DX, $0x3c
9727  	JB   one_byte_emit_remainder_encodeBetterBlockAsm10B
9728  	CMPL DX, $0x00000100
9729  	JB   two_bytes_emit_remainder_encodeBetterBlockAsm10B
9730  	JB   three_bytes_emit_remainder_encodeBetterBlockAsm10B
9731  
9732  three_bytes_emit_remainder_encodeBetterBlockAsm10B:
9733  	MOVB $0xf4, (CX)
9734  	MOVW DX, 1(CX)
9735  	ADDQ $0x03, CX
9736  	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm10B
9737  
9738  two_bytes_emit_remainder_encodeBetterBlockAsm10B:
9739  	MOVB $0xf0, (CX)
9740  	MOVB DL, 1(CX)
9741  	ADDQ $0x02, CX
9742  	CMPL DX, $0x40
9743  	JB   memmove_emit_remainder_encodeBetterBlockAsm10B
9744  	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm10B
9745  
9746  one_byte_emit_remainder_encodeBetterBlockAsm10B:
9747  	SHLB $0x02, DL
9748  	MOVB DL, (CX)
9749  	ADDQ $0x01, CX
9750  
9751  memmove_emit_remainder_encodeBetterBlockAsm10B:
9752  	LEAQ (CX)(SI*1), DX
9753  	MOVL SI, BX
9754  
9755  	// genMemMoveShort
9756  	CMPQ BX, $0x03
9757  	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2
9758  	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3
9759  	CMPQ BX, $0x08
9760  	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7
9761  	CMPQ BX, $0x10
9762  	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16
9763  	CMPQ BX, $0x20
9764  	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32
9765  	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64
9766  
9767  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2:
9768  	MOVB (AX), SI
9769  	MOVB -1(AX)(BX*1), AL
9770  	MOVB SI, (CX)
9771  	MOVB AL, -1(CX)(BX*1)
9772  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
9773  
9774  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3:
9775  	MOVW (AX), SI
9776  	MOVB 2(AX), AL
9777  	MOVW SI, (CX)
9778  	MOVB AL, 2(CX)
9779  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
9780  
9781  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7:
9782  	MOVL (AX), SI
9783  	MOVL -4(AX)(BX*1), AX
9784  	MOVL SI, (CX)
9785  	MOVL AX, -4(CX)(BX*1)
9786  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
9787  
9788  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16:
9789  	MOVQ (AX), SI
9790  	MOVQ -8(AX)(BX*1), AX
9791  	MOVQ SI, (CX)
9792  	MOVQ AX, -8(CX)(BX*1)
9793  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
9794  
9795  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32:
9796  	MOVOU (AX), X0
9797  	MOVOU -16(AX)(BX*1), X1
9798  	MOVOU X0, (CX)
9799  	MOVOU X1, -16(CX)(BX*1)
9800  	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
9801  
9802  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64:
9803  	MOVOU (AX), X0
9804  	MOVOU 16(AX), X1
9805  	MOVOU -32(AX)(BX*1), X2
9806  	MOVOU -16(AX)(BX*1), X3
9807  	MOVOU X0, (CX)
9808  	MOVOU X1, 16(CX)
9809  	MOVOU X2, -32(CX)(BX*1)
9810  	MOVOU X3, -16(CX)(BX*1)
9811  
9812  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B:
9813  	MOVQ DX, CX
9814  	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
9815  
9816  memmove_long_emit_remainder_encodeBetterBlockAsm10B:
9817  	LEAQ (CX)(SI*1), DX
9818  	MOVL SI, BX
9819  
9820  	// genMemMoveLong
9821  	MOVOU (AX), X0
9822  	MOVOU 16(AX), X1
9823  	MOVOU -32(AX)(BX*1), X2
9824  	MOVOU -16(AX)(BX*1), X3
9825  	MOVQ  BX, DI
9826  	SHRQ  $0x05, DI
9827  	MOVQ  CX, SI
9828  	ANDL  $0x0000001f, SI
9829  	MOVQ  $0x00000040, R8
9830  	SUBQ  SI, R8
9831  	DECQ  DI
9832  	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
9833  	LEAQ  -32(AX)(R8*1), SI
9834  	LEAQ  -32(CX)(R8*1), R9
9835  
9836  emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back:
9837  	MOVOU (SI), X4
9838  	MOVOU 16(SI), X5
9839  	MOVOA X4, (R9)
9840  	MOVOA X5, 16(R9)
9841  	ADDQ  $0x20, R9
9842  	ADDQ  $0x20, SI
9843  	ADDQ  $0x20, R8
9844  	DECQ  DI
9845  	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back
9846  
9847  emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
9848  	MOVOU -32(AX)(R8*1), X4
9849  	MOVOU -16(AX)(R8*1), X5
9850  	MOVOA X4, -32(CX)(R8*1)
9851  	MOVOA X5, -16(CX)(R8*1)
9852  	ADDQ  $0x20, R8
9853  	CMPQ  BX, R8
9854  	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
9855  	MOVOU X0, (CX)
9856  	MOVOU X1, 16(CX)
9857  	MOVOU X2, -32(CX)(BX*1)
9858  	MOVOU X3, -16(CX)(BX*1)
9859  	MOVQ  DX, CX
9860  
9861  emit_literal_done_emit_remainder_encodeBetterBlockAsm10B:
9862  	MOVQ dst_base+0(FP), AX
9863  	SUBQ AX, CX
9864  	MOVQ CX, ret+56(FP)
9865  	RET
9866  
9867  // func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int
9868  // Requires: BMI, SSE2
9869  TEXT ·encodeBetterBlockAsm8B(SB), $24-64
9870  	MOVQ tmp+48(FP), AX
9871  	MOVQ dst_base+0(FP), CX
9872  	MOVQ $0x00000028, DX
9873  	MOVQ AX, BX
9874  	PXOR X0, X0
9875  
9876  zero_loop_encodeBetterBlockAsm8B:
9877  	MOVOU X0, (BX)
9878  	MOVOU X0, 16(BX)
9879  	MOVOU X0, 32(BX)
9880  	MOVOU X0, 48(BX)
9881  	MOVOU X0, 64(BX)
9882  	MOVOU X0, 80(BX)
9883  	MOVOU X0, 96(BX)
9884  	MOVOU X0, 112(BX)
9885  	ADDQ  $0x80, BX
9886  	DECQ  DX
9887  	JNZ   zero_loop_encodeBetterBlockAsm8B
9888  	MOVL  $0x00000000, 12(SP)
9889  	MOVQ  src_len+32(FP), DX
9890  	LEAQ  -6(DX), BX
9891  	LEAQ  -8(DX), SI
9892  	MOVL  SI, 8(SP)
9893  	SHRQ  $0x05, DX
9894  	SUBL  DX, BX
9895  	LEAQ  (CX)(BX*1), BX
9896  	MOVQ  BX, (SP)
9897  	MOVL  $0x00000001, DX
9898  	MOVL  $0x00000000, 16(SP)
9899  	MOVQ  src_base+24(FP), BX
9900  
9901  search_loop_encodeBetterBlockAsm8B:
9902  	MOVL  DX, SI
9903  	SUBL  12(SP), SI
9904  	SHRL  $0x04, SI
9905  	LEAL  1(DX)(SI*1), SI
9906  	CMPL  SI, 8(SP)
9907  	JAE   emit_remainder_encodeBetterBlockAsm8B
9908  	MOVQ  (BX)(DX*1), DI
9909  	MOVL  SI, 20(SP)
9910  	MOVQ  $0x0000cf1bbcdcbf9b, R9
9911  	MOVQ  $0x9e3779b1, SI
9912  	MOVQ  DI, R10
9913  	MOVQ  DI, R11
9914  	SHLQ  $0x10, R10
9915  	IMULQ R9, R10
9916  	SHRQ  $0x36, R10
9917  	SHLQ  $0x20, R11
9918  	IMULQ SI, R11
9919  	SHRQ  $0x38, R11
9920  	MOVL  (AX)(R10*4), SI
9921  	MOVL  4096(AX)(R11*4), R8
9922  	MOVL  DX, (AX)(R10*4)
9923  	MOVL  DX, 4096(AX)(R11*4)
9924  	MOVQ  (BX)(SI*1), R10
9925  	MOVQ  (BX)(R8*1), R11
9926  	CMPQ  R10, DI
9927  	JEQ   candidate_match_encodeBetterBlockAsm8B
9928  	CMPQ  R11, DI
9929  	JNE   no_short_found_encodeBetterBlockAsm8B
9930  	MOVL  R8, SI
9931  	JMP   candidate_match_encodeBetterBlockAsm8B
9932  
9933  no_short_found_encodeBetterBlockAsm8B:
9934  	CMPL R10, DI
9935  	JEQ  candidate_match_encodeBetterBlockAsm8B
9936  	CMPL R11, DI
9937  	JEQ  candidateS_match_encodeBetterBlockAsm8B
9938  	MOVL 20(SP), DX
9939  	JMP  search_loop_encodeBetterBlockAsm8B
9940  
9941  candidateS_match_encodeBetterBlockAsm8B:
9942  	SHRQ  $0x08, DI
9943  	MOVQ  DI, R10
9944  	SHLQ  $0x10, R10
9945  	IMULQ R9, R10
9946  	SHRQ  $0x36, R10
9947  	MOVL  (AX)(R10*4), SI
9948  	INCL  DX
9949  	MOVL  DX, (AX)(R10*4)
9950  	CMPL  (BX)(SI*1), DI
9951  	JEQ   candidate_match_encodeBetterBlockAsm8B
9952  	DECL  DX
9953  	MOVL  R8, SI
9954  
9955  candidate_match_encodeBetterBlockAsm8B:
9956  	MOVL  12(SP), DI
9957  	TESTL SI, SI
9958  	JZ    match_extend_back_end_encodeBetterBlockAsm8B
9959  
9960  match_extend_back_loop_encodeBetterBlockAsm8B:
9961  	CMPL DX, DI
9962  	JBE  match_extend_back_end_encodeBetterBlockAsm8B
9963  	MOVB -1(BX)(SI*1), R8
9964  	MOVB -1(BX)(DX*1), R9
9965  	CMPB R8, R9
9966  	JNE  match_extend_back_end_encodeBetterBlockAsm8B
9967  	LEAL -1(DX), DX
9968  	DECL SI
9969  	JZ   match_extend_back_end_encodeBetterBlockAsm8B
9970  	JMP  match_extend_back_loop_encodeBetterBlockAsm8B
9971  
9972  match_extend_back_end_encodeBetterBlockAsm8B:
9973  	MOVL DX, DI
9974  	SUBL 12(SP), DI
9975  	LEAQ 3(CX)(DI*1), DI
9976  	CMPQ DI, (SP)
9977  	JB   match_dst_size_check_encodeBetterBlockAsm8B
9978  	MOVQ $0x00000000, ret+56(FP)
9979  	RET
9980  
9981  match_dst_size_check_encodeBetterBlockAsm8B:
9982  	MOVL DX, DI
9983  	ADDL $0x04, DX
9984  	ADDL $0x04, SI
9985  	MOVQ src_len+32(FP), R8
9986  	SUBL DX, R8
9987  	LEAQ (BX)(DX*1), R9
9988  	LEAQ (BX)(SI*1), R10
9989  
9990  	// matchLen
9991  	XORL R12, R12
9992  
9993  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B:
9994  	CMPL R8, $0x10
9995  	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm8B
9996  	MOVQ (R9)(R12*1), R11
9997  	MOVQ 8(R9)(R12*1), R13
9998  	XORQ (R10)(R12*1), R11
9999  	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B
10000  	XORQ 8(R10)(R12*1), R13
10001  	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B
10002  	LEAL -16(R8), R8
10003  	LEAL 16(R12), R12
10004  	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B
10005  
10006  matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B:
10007  #ifdef GOAMD64_v3
10008  	TZCNTQ R13, R13
10009  
10010  #else
10011  	BSFQ R13, R13
10012  
10013  #endif
10014  	SARQ $0x03, R13
10015  	LEAL 8(R12)(R13*1), R12
10016  	JMP  match_nolit_end_encodeBetterBlockAsm8B
10017  
10018  matchlen_match8_match_nolit_encodeBetterBlockAsm8B:
10019  	CMPL R8, $0x08
10020  	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm8B
10021  	MOVQ (R9)(R12*1), R11
10022  	XORQ (R10)(R12*1), R11
10023  	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B
10024  	LEAL -8(R8), R8
10025  	LEAL 8(R12), R12
10026  	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm8B
10027  
10028  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B:
10029  #ifdef GOAMD64_v3
10030  	TZCNTQ R11, R11
10031  
10032  #else
10033  	BSFQ R11, R11
10034  
10035  #endif
10036  	SARQ $0x03, R11
10037  	LEAL (R12)(R11*1), R12
10038  	JMP  match_nolit_end_encodeBetterBlockAsm8B
10039  
10040  matchlen_match4_match_nolit_encodeBetterBlockAsm8B:
10041  	CMPL R8, $0x04
10042  	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm8B
10043  	MOVL (R9)(R12*1), R11
10044  	CMPL (R10)(R12*1), R11
10045  	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm8B
10046  	LEAL -4(R8), R8
10047  	LEAL 4(R12), R12
10048  
10049  matchlen_match2_match_nolit_encodeBetterBlockAsm8B:
10050  	CMPL R8, $0x01
10051  	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm8B
10052  	JB   match_nolit_end_encodeBetterBlockAsm8B
10053  	MOVW (R9)(R12*1), R11
10054  	CMPW (R10)(R12*1), R11
10055  	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm8B
10056  	LEAL 2(R12), R12
10057  	SUBL $0x02, R8
10058  	JZ   match_nolit_end_encodeBetterBlockAsm8B
10059  
10060  matchlen_match1_match_nolit_encodeBetterBlockAsm8B:
10061  	MOVB (R9)(R12*1), R11
10062  	CMPB (R10)(R12*1), R11
10063  	JNE  match_nolit_end_encodeBetterBlockAsm8B
10064  	LEAL 1(R12), R12
10065  
10066  match_nolit_end_encodeBetterBlockAsm8B:
10067  	MOVL DX, R8
10068  	SUBL SI, R8
10069  
10070  	// Check if repeat
10071  	CMPL 16(SP), R8
10072  	JEQ  match_is_repeat_encodeBetterBlockAsm8B
10073  	MOVL R8, 16(SP)
10074  	MOVL 12(SP), SI
10075  	CMPL SI, DI
10076  	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm8B
10077  	MOVL DI, R9
10078  	MOVL DI, 12(SP)
10079  	LEAQ (BX)(SI*1), R10
10080  	SUBL SI, R9
10081  	LEAL -1(R9), SI
10082  	CMPL SI, $0x3c
10083  	JB   one_byte_match_emit_encodeBetterBlockAsm8B
10084  	CMPL SI, $0x00000100
10085  	JB   two_bytes_match_emit_encodeBetterBlockAsm8B
10086  	JB   three_bytes_match_emit_encodeBetterBlockAsm8B
10087  
10088  three_bytes_match_emit_encodeBetterBlockAsm8B:
10089  	MOVB $0xf4, (CX)
10090  	MOVW SI, 1(CX)
10091  	ADDQ $0x03, CX
10092  	JMP  memmove_long_match_emit_encodeBetterBlockAsm8B
10093  
10094  two_bytes_match_emit_encodeBetterBlockAsm8B:
10095  	MOVB $0xf0, (CX)
10096  	MOVB SI, 1(CX)
10097  	ADDQ $0x02, CX
10098  	CMPL SI, $0x40
10099  	JB   memmove_match_emit_encodeBetterBlockAsm8B
10100  	JMP  memmove_long_match_emit_encodeBetterBlockAsm8B
10101  
10102  one_byte_match_emit_encodeBetterBlockAsm8B:
10103  	SHLB $0x02, SI
10104  	MOVB SI, (CX)
10105  	ADDQ $0x01, CX
10106  
10107  memmove_match_emit_encodeBetterBlockAsm8B:
10108  	LEAQ (CX)(R9*1), SI
10109  
10110  	// genMemMoveShort
10111  	CMPQ R9, $0x04
10112  	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4
10113  	CMPQ R9, $0x08
10114  	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7
10115  	CMPQ R9, $0x10
10116  	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16
10117  	CMPQ R9, $0x20
10118  	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32
10119  	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64
10120  
10121  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4:
10122  	MOVL (R10), R11
10123  	MOVL R11, (CX)
10124  	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm8B
10125  
10126  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7:
10127  	MOVL (R10), R11
10128  	MOVL -4(R10)(R9*1), R10
10129  	MOVL R11, (CX)
10130  	MOVL R10, -4(CX)(R9*1)
10131  	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm8B
10132  
10133  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16:
10134  	MOVQ (R10), R11
10135  	MOVQ -8(R10)(R9*1), R10
10136  	MOVQ R11, (CX)
10137  	MOVQ R10, -8(CX)(R9*1)
10138  	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm8B
10139  
10140  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32:
10141  	MOVOU (R10), X0
10142  	MOVOU -16(R10)(R9*1), X1
10143  	MOVOU X0, (CX)
10144  	MOVOU X1, -16(CX)(R9*1)
10145  	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm8B
10146  
10147  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64:
10148  	MOVOU (R10), X0
10149  	MOVOU 16(R10), X1
10150  	MOVOU -32(R10)(R9*1), X2
10151  	MOVOU -16(R10)(R9*1), X3
10152  	MOVOU X0, (CX)
10153  	MOVOU X1, 16(CX)
10154  	MOVOU X2, -32(CX)(R9*1)
10155  	MOVOU X3, -16(CX)(R9*1)
10156  
10157  memmove_end_copy_match_emit_encodeBetterBlockAsm8B:
10158  	MOVQ SI, CX
10159  	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm8B
10160  
10161  memmove_long_match_emit_encodeBetterBlockAsm8B:
10162  	LEAQ (CX)(R9*1), SI
10163  
10164  	// genMemMoveLong
10165  	MOVOU (R10), X0
10166  	MOVOU 16(R10), X1
10167  	MOVOU -32(R10)(R9*1), X2
10168  	MOVOU -16(R10)(R9*1), X3
10169  	MOVQ  R9, R13
10170  	SHRQ  $0x05, R13
10171  	MOVQ  CX, R11
10172  	ANDL  $0x0000001f, R11
10173  	MOVQ  $0x00000040, R14
10174  	SUBQ  R11, R14
10175  	DECQ  R13
10176  	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
10177  	LEAQ  -32(R10)(R14*1), R11
10178  	LEAQ  -32(CX)(R14*1), R15
10179  
10180  emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back:
10181  	MOVOU (R11), X4
10182  	MOVOU 16(R11), X5
10183  	MOVOA X4, (R15)
10184  	MOVOA X5, 16(R15)
10185  	ADDQ  $0x20, R15
10186  	ADDQ  $0x20, R11
10187  	ADDQ  $0x20, R14
10188  	DECQ  R13
10189  	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back
10190  
10191  emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
10192  	MOVOU -32(R10)(R14*1), X4
10193  	MOVOU -16(R10)(R14*1), X5
10194  	MOVOA X4, -32(CX)(R14*1)
10195  	MOVOA X5, -16(CX)(R14*1)
10196  	ADDQ  $0x20, R14
10197  	CMPQ  R9, R14
10198  	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
10199  	MOVOU X0, (CX)
10200  	MOVOU X1, 16(CX)
10201  	MOVOU X2, -32(CX)(R9*1)
10202  	MOVOU X3, -16(CX)(R9*1)
10203  	MOVQ  SI, CX
10204  
10205  emit_literal_done_match_emit_encodeBetterBlockAsm8B:
10206  	ADDL R12, DX
10207  	ADDL $0x04, R12
10208  	MOVL DX, 12(SP)
10209  
10210  	// emitCopy
10211  	CMPL R12, $0x40
10212  	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B
10213  	CMPL R8, $0x00000800
10214  	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm8B
10215  	MOVL $0x00000001, SI
10216  	LEAL 16(SI), SI
10217  	MOVB R8, 1(CX)
10218  	SHRL $0x08, R8
10219  	SHLL $0x05, R8
10220  	ORL  R8, SI
10221  	MOVB SI, (CX)
10222  	ADDQ $0x02, CX
10223  	SUBL $0x08, R12
10224  
10225  	// emitRepeat
10226  	LEAL -4(R12), R12
10227  	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
10228  	MOVL R12, SI
10229  	LEAL -4(R12), R12
10230  	CMPL SI, $0x08
10231  	JBE  repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
10232  	CMPL SI, $0x0c
10233  	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
10234  
10235  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
10236  	CMPL R12, $0x00000104
10237  	JB   repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
10238  	LEAL -256(R12), R12
10239  	MOVW $0x0019, (CX)
10240  	MOVW R12, 2(CX)
10241  	ADDQ $0x04, CX
10242  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
10243  
10244  repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
10245  	LEAL -4(R12), R12
10246  	MOVW $0x0015, (CX)
10247  	MOVB R12, 2(CX)
10248  	ADDQ $0x03, CX
10249  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
10250  
10251  repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
10252  	SHLL $0x02, R12
10253  	ORL  $0x01, R12
10254  	MOVW R12, (CX)
10255  	ADDQ $0x02, CX
10256  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
10257  	XORQ SI, SI
10258  	LEAL 1(SI)(R12*4), R12
10259  	MOVB R8, 1(CX)
10260  	SARL $0x08, R8
10261  	SHLL $0x05, R8
10262  	ORL  R8, R12
10263  	MOVB R12, (CX)
10264  	ADDQ $0x02, CX
10265  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
10266  
10267  long_offset_short_match_nolit_encodeBetterBlockAsm8B:
10268  	MOVB $0xee, (CX)
10269  	MOVW R8, 1(CX)
10270  	LEAL -60(R12), R12
10271  	ADDQ $0x03, CX
10272  
10273  	// emitRepeat
10274  	MOVL R12, SI
10275  	LEAL -4(R12), R12
10276  	CMPL SI, $0x08
10277  	JBE  repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
10278  	CMPL SI, $0x0c
10279  	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
10280  
10281  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
10282  	CMPL R12, $0x00000104
10283  	JB   repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
10284  	LEAL -256(R12), R12
10285  	MOVW $0x0019, (CX)
10286  	MOVW R12, 2(CX)
10287  	ADDQ $0x04, CX
10288  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
10289  
10290  repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
10291  	LEAL -4(R12), R12
10292  	MOVW $0x0015, (CX)
10293  	MOVB R12, 2(CX)
10294  	ADDQ $0x03, CX
10295  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
10296  
10297  repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
10298  	SHLL $0x02, R12
10299  	ORL  $0x01, R12
10300  	MOVW R12, (CX)
10301  	ADDQ $0x02, CX
10302  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
10303  	XORQ SI, SI
10304  	LEAL 1(SI)(R12*4), R12
10305  	MOVB R8, 1(CX)
10306  	SARL $0x08, R8
10307  	SHLL $0x05, R8
10308  	ORL  R8, R12
10309  	MOVB R12, (CX)
10310  	ADDQ $0x02, CX
10311  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
10312  
10313  two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B:
10314  	MOVL R12, SI
10315  	SHLL $0x02, SI
10316  	CMPL R12, $0x0c
10317  	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm8B
10318  	LEAL -15(SI), SI
10319  	MOVB R8, 1(CX)
10320  	SHRL $0x08, R8
10321  	SHLL $0x05, R8
10322  	ORL  R8, SI
10323  	MOVB SI, (CX)
10324  	ADDQ $0x02, CX
10325  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
10326  
10327  emit_copy_three_match_nolit_encodeBetterBlockAsm8B:
10328  	LEAL -2(SI), SI
10329  	MOVB SI, (CX)
10330  	MOVW R8, 1(CX)
10331  	ADDQ $0x03, CX
10332  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
10333  
10334  match_is_repeat_encodeBetterBlockAsm8B:
10335  	MOVL 12(SP), SI
10336  	CMPL SI, DI
10337  	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
10338  	MOVL DI, R8
10339  	MOVL DI, 12(SP)
10340  	LEAQ (BX)(SI*1), R9
10341  	SUBL SI, R8
10342  	LEAL -1(R8), SI
10343  	CMPL SI, $0x3c
10344  	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm8B
10345  	CMPL SI, $0x00000100
10346  	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm8B
10347  	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm8B
10348  
10349  three_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
10350  	MOVB $0xf4, (CX)
10351  	MOVW SI, 1(CX)
10352  	ADDQ $0x03, CX
10353  	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
10354  
10355  two_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
10356  	MOVB $0xf0, (CX)
10357  	MOVB SI, 1(CX)
10358  	ADDQ $0x02, CX
10359  	CMPL SI, $0x40
10360  	JB   memmove_match_emit_repeat_encodeBetterBlockAsm8B
10361  	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
10362  
10363  one_byte_match_emit_repeat_encodeBetterBlockAsm8B:
10364  	SHLB $0x02, SI
10365  	MOVB SI, (CX)
10366  	ADDQ $0x01, CX
10367  
10368  memmove_match_emit_repeat_encodeBetterBlockAsm8B:
10369  	LEAQ (CX)(R8*1), SI
10370  
10371  	// genMemMoveShort
10372  	CMPQ R8, $0x04
10373  	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4
10374  	CMPQ R8, $0x08
10375  	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7
10376  	CMPQ R8, $0x10
10377  	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16
10378  	CMPQ R8, $0x20
10379  	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32
10380  	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64
10381  
10382  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4:
10383  	MOVL (R9), R10
10384  	MOVL R10, (CX)
10385  	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
10386  
10387  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7:
10388  	MOVL (R9), R10
10389  	MOVL -4(R9)(R8*1), R9
10390  	MOVL R10, (CX)
10391  	MOVL R9, -4(CX)(R8*1)
10392  	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
10393  
10394  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16:
10395  	MOVQ (R9), R10
10396  	MOVQ -8(R9)(R8*1), R9
10397  	MOVQ R10, (CX)
10398  	MOVQ R9, -8(CX)(R8*1)
10399  	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
10400  
10401  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32:
10402  	MOVOU (R9), X0
10403  	MOVOU -16(R9)(R8*1), X1
10404  	MOVOU X0, (CX)
10405  	MOVOU X1, -16(CX)(R8*1)
10406  	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
10407  
10408  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64:
10409  	MOVOU (R9), X0
10410  	MOVOU 16(R9), X1
10411  	MOVOU -32(R9)(R8*1), X2
10412  	MOVOU -16(R9)(R8*1), X3
10413  	MOVOU X0, (CX)
10414  	MOVOU X1, 16(CX)
10415  	MOVOU X2, -32(CX)(R8*1)
10416  	MOVOU X3, -16(CX)(R8*1)
10417  
10418  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B:
10419  	MOVQ SI, CX
10420  	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
10421  
10422  memmove_long_match_emit_repeat_encodeBetterBlockAsm8B:
10423  	LEAQ (CX)(R8*1), SI
10424  
10425  	// genMemMoveLong
10426  	MOVOU (R9), X0
10427  	MOVOU 16(R9), X1
10428  	MOVOU -32(R9)(R8*1), X2
10429  	MOVOU -16(R9)(R8*1), X3
10430  	MOVQ  R8, R11
10431  	SHRQ  $0x05, R11
10432  	MOVQ  CX, R10
10433  	ANDL  $0x0000001f, R10
10434  	MOVQ  $0x00000040, R13
10435  	SUBQ  R10, R13
10436  	DECQ  R11
10437  	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
10438  	LEAQ  -32(R9)(R13*1), R10
10439  	LEAQ  -32(CX)(R13*1), R14
10440  
10441  emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back:
10442  	MOVOU (R10), X4
10443  	MOVOU 16(R10), X5
10444  	MOVOA X4, (R14)
10445  	MOVOA X5, 16(R14)
10446  	ADDQ  $0x20, R14
10447  	ADDQ  $0x20, R10
10448  	ADDQ  $0x20, R13
10449  	DECQ  R11
10450  	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back
10451  
10452  emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
10453  	MOVOU -32(R9)(R13*1), X4
10454  	MOVOU -16(R9)(R13*1), X5
10455  	MOVOA X4, -32(CX)(R13*1)
10456  	MOVOA X5, -16(CX)(R13*1)
10457  	ADDQ  $0x20, R13
10458  	CMPQ  R8, R13
10459  	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
10460  	MOVOU X0, (CX)
10461  	MOVOU X1, 16(CX)
10462  	MOVOU X2, -32(CX)(R8*1)
10463  	MOVOU X3, -16(CX)(R8*1)
10464  	MOVQ  SI, CX
10465  
10466  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B:
10467  	ADDL R12, DX
10468  	ADDL $0x04, R12
10469  	MOVL DX, 12(SP)
10470  
10471  	// emitRepeat
10472  	MOVL R12, SI
10473  	LEAL -4(R12), R12
10474  	CMPL SI, $0x08
10475  	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B
10476  	CMPL SI, $0x0c
10477  	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B
10478  
10479  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B:
10480  	CMPL R12, $0x00000104
10481  	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B
10482  	LEAL -256(R12), R12
10483  	MOVW $0x0019, (CX)
10484  	MOVW R12, 2(CX)
10485  	ADDQ $0x04, CX
10486  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
10487  
10488  repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B:
10489  	LEAL -4(R12), R12
10490  	MOVW $0x0015, (CX)
10491  	MOVB R12, 2(CX)
10492  	ADDQ $0x03, CX
10493  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
10494  
10495  repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B:
10496  	SHLL $0x02, R12
10497  	ORL  $0x01, R12
10498  	MOVW R12, (CX)
10499  	ADDQ $0x02, CX
10500  	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
10501  	XORQ SI, SI
10502  	LEAL 1(SI)(R12*4), R12
10503  	MOVB R8, 1(CX)
10504  	SARL $0x08, R8
10505  	SHLL $0x05, R8
10506  	ORL  R8, R12
10507  	MOVB R12, (CX)
10508  	ADDQ $0x02, CX
10509  
10510  match_nolit_emitcopy_end_encodeBetterBlockAsm8B:
10511  	CMPL DX, 8(SP)
10512  	JAE  emit_remainder_encodeBetterBlockAsm8B
10513  	CMPQ CX, (SP)
10514  	JB   match_nolit_dst_ok_encodeBetterBlockAsm8B
10515  	MOVQ $0x00000000, ret+56(FP)
10516  	RET
10517  
10518  match_nolit_dst_ok_encodeBetterBlockAsm8B:
10519  	MOVQ  $0x0000cf1bbcdcbf9b, SI
10520  	MOVQ  $0x9e3779b1, R8
10521  	LEAQ  1(DI), DI
10522  	LEAQ  -2(DX), R9
10523  	MOVQ  (BX)(DI*1), R10
10524  	MOVQ  1(BX)(DI*1), R11
10525  	MOVQ  (BX)(R9*1), R12
10526  	MOVQ  1(BX)(R9*1), R13
10527  	SHLQ  $0x10, R10
10528  	IMULQ SI, R10
10529  	SHRQ  $0x36, R10
10530  	SHLQ  $0x20, R11
10531  	IMULQ R8, R11
10532  	SHRQ  $0x38, R11
10533  	SHLQ  $0x10, R12
10534  	IMULQ SI, R12
10535  	SHRQ  $0x36, R12
10536  	SHLQ  $0x20, R13
10537  	IMULQ R8, R13
10538  	SHRQ  $0x38, R13
10539  	LEAQ  1(DI), R8
10540  	LEAQ  1(R9), R14
10541  	MOVL  DI, (AX)(R10*4)
10542  	MOVL  R9, (AX)(R12*4)
10543  	MOVL  R8, 4096(AX)(R11*4)
10544  	MOVL  R14, 4096(AX)(R13*4)
10545  	LEAQ  1(R9)(DI*1), R8
10546  	SHRQ  $0x01, R8
10547  	ADDQ  $0x01, DI
10548  	SUBQ  $0x01, R9
10549  
10550  index_loop_encodeBetterBlockAsm8B:
10551  	CMPQ  R8, R9
10552  	JAE   search_loop_encodeBetterBlockAsm8B
10553  	MOVQ  (BX)(DI*1), R10
10554  	MOVQ  (BX)(R8*1), R11
10555  	SHLQ  $0x10, R10
10556  	IMULQ SI, R10
10557  	SHRQ  $0x36, R10
10558  	SHLQ  $0x10, R11
10559  	IMULQ SI, R11
10560  	SHRQ  $0x36, R11
10561  	MOVL  DI, (AX)(R10*4)
10562  	MOVL  R8, (AX)(R11*4)
10563  	ADDQ  $0x02, DI
10564  	ADDQ  $0x02, R8
10565  	JMP   index_loop_encodeBetterBlockAsm8B
10566  
10567  emit_remainder_encodeBetterBlockAsm8B:
10568  	MOVQ src_len+32(FP), AX
10569  	SUBL 12(SP), AX
10570  	LEAQ 3(CX)(AX*1), AX
10571  	CMPQ AX, (SP)
10572  	JB   emit_remainder_ok_encodeBetterBlockAsm8B
10573  	MOVQ $0x00000000, ret+56(FP)
10574  	RET
10575  
10576  emit_remainder_ok_encodeBetterBlockAsm8B:
10577  	MOVQ src_len+32(FP), AX
10578  	MOVL 12(SP), DX
10579  	CMPL DX, AX
10580  	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
10581  	MOVL AX, SI
10582  	MOVL AX, 12(SP)
10583  	LEAQ (BX)(DX*1), AX
10584  	SUBL DX, SI
10585  	LEAL -1(SI), DX
10586  	CMPL DX, $0x3c
10587  	JB   one_byte_emit_remainder_encodeBetterBlockAsm8B
10588  	CMPL DX, $0x00000100
10589  	JB   two_bytes_emit_remainder_encodeBetterBlockAsm8B
10590  	JB   three_bytes_emit_remainder_encodeBetterBlockAsm8B
10591  
10592  three_bytes_emit_remainder_encodeBetterBlockAsm8B:
10593  	MOVB $0xf4, (CX)
10594  	MOVW DX, 1(CX)
10595  	ADDQ $0x03, CX
10596  	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm8B
10597  
10598  two_bytes_emit_remainder_encodeBetterBlockAsm8B:
10599  	MOVB $0xf0, (CX)
10600  	MOVB DL, 1(CX)
10601  	ADDQ $0x02, CX
10602  	CMPL DX, $0x40
10603  	JB   memmove_emit_remainder_encodeBetterBlockAsm8B
10604  	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm8B
10605  
10606  one_byte_emit_remainder_encodeBetterBlockAsm8B:
10607  	SHLB $0x02, DL
10608  	MOVB DL, (CX)
10609  	ADDQ $0x01, CX
10610  
10611  memmove_emit_remainder_encodeBetterBlockAsm8B:
10612  	LEAQ (CX)(SI*1), DX
10613  	MOVL SI, BX
10614  
10615  	// genMemMoveShort
10616  	CMPQ BX, $0x03
10617  	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2
10618  	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3
10619  	CMPQ BX, $0x08
10620  	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7
10621  	CMPQ BX, $0x10
10622  	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16
10623  	CMPQ BX, $0x20
10624  	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32
10625  	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64
10626  
10627  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2:
10628  	MOVB (AX), SI
10629  	MOVB -1(AX)(BX*1), AL
10630  	MOVB SI, (CX)
10631  	MOVB AL, -1(CX)(BX*1)
10632  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
10633  
10634  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3:
10635  	MOVW (AX), SI
10636  	MOVB 2(AX), AL
10637  	MOVW SI, (CX)
10638  	MOVB AL, 2(CX)
10639  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
10640  
10641  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7:
10642  	MOVL (AX), SI
10643  	MOVL -4(AX)(BX*1), AX
10644  	MOVL SI, (CX)
10645  	MOVL AX, -4(CX)(BX*1)
10646  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
10647  
10648  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16:
10649  	MOVQ (AX), SI
10650  	MOVQ -8(AX)(BX*1), AX
10651  	MOVQ SI, (CX)
10652  	MOVQ AX, -8(CX)(BX*1)
10653  	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
10654  
10655  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32:
10656  	MOVOU (AX), X0
10657  	MOVOU -16(AX)(BX*1), X1
10658  	MOVOU X0, (CX)
10659  	MOVOU X1, -16(CX)(BX*1)
10660  	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
10661  
10662  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64:
10663  	MOVOU (AX), X0
10664  	MOVOU 16(AX), X1
10665  	MOVOU -32(AX)(BX*1), X2
10666  	MOVOU -16(AX)(BX*1), X3
10667  	MOVOU X0, (CX)
10668  	MOVOU X1, 16(CX)
10669  	MOVOU X2, -32(CX)(BX*1)
10670  	MOVOU X3, -16(CX)(BX*1)
10671  
10672  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B:
10673  	MOVQ DX, CX
10674  	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
10675  
10676  memmove_long_emit_remainder_encodeBetterBlockAsm8B:
10677  	LEAQ (CX)(SI*1), DX
10678  	MOVL SI, BX
10679  
10680  	// genMemMoveLong
10681  	MOVOU (AX), X0
10682  	MOVOU 16(AX), X1
10683  	MOVOU -32(AX)(BX*1), X2
10684  	MOVOU -16(AX)(BX*1), X3
10685  	MOVQ  BX, DI
10686  	SHRQ  $0x05, DI
10687  	MOVQ  CX, SI
10688  	ANDL  $0x0000001f, SI
10689  	MOVQ  $0x00000040, R8
10690  	SUBQ  SI, R8
10691  	DECQ  DI
10692  	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
10693  	LEAQ  -32(AX)(R8*1), SI
10694  	LEAQ  -32(CX)(R8*1), R9
10695  
10696  emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back:
10697  	MOVOU (SI), X4
10698  	MOVOU 16(SI), X5
10699  	MOVOA X4, (R9)
10700  	MOVOA X5, 16(R9)
10701  	ADDQ  $0x20, R9
10702  	ADDQ  $0x20, SI
10703  	ADDQ  $0x20, R8
10704  	DECQ  DI
10705  	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back
10706  
10707  emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
10708  	MOVOU -32(AX)(R8*1), X4
10709  	MOVOU -16(AX)(R8*1), X5
10710  	MOVOA X4, -32(CX)(R8*1)
10711  	MOVOA X5, -16(CX)(R8*1)
10712  	ADDQ  $0x20, R8
10713  	CMPQ  BX, R8
10714  	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
10715  	MOVOU X0, (CX)
10716  	MOVOU X1, 16(CX)
10717  	MOVOU X2, -32(CX)(BX*1)
10718  	MOVOU X3, -16(CX)(BX*1)
10719  	MOVQ  DX, CX
10720  
10721  emit_literal_done_emit_remainder_encodeBetterBlockAsm8B:
10722  	MOVQ dst_base+0(FP), AX
10723  	SUBQ AX, CX
10724  	MOVQ CX, ret+56(FP)
10725  	RET
10726  
10727  // func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int
10728  // Requires: BMI, SSE2
10729  TEXT ·encodeSnappyBlockAsm(SB), $24-64
10730  	MOVQ tmp+48(FP), AX
10731  	MOVQ dst_base+0(FP), CX
10732  	MOVQ $0x00000200, DX
10733  	MOVQ AX, BX
10734  	PXOR X0, X0
10735  
10736  zero_loop_encodeSnappyBlockAsm:
10737  	MOVOU X0, (BX)
10738  	MOVOU X0, 16(BX)
10739  	MOVOU X0, 32(BX)
10740  	MOVOU X0, 48(BX)
10741  	MOVOU X0, 64(BX)
10742  	MOVOU X0, 80(BX)
10743  	MOVOU X0, 96(BX)
10744  	MOVOU X0, 112(BX)
10745  	ADDQ  $0x80, BX
10746  	DECQ  DX
10747  	JNZ   zero_loop_encodeSnappyBlockAsm
10748  	MOVL  $0x00000000, 12(SP)
10749  	MOVQ  src_len+32(FP), DX
10750  	LEAQ  -9(DX), BX
10751  	LEAQ  -8(DX), SI
10752  	MOVL  SI, 8(SP)
10753  	SHRQ  $0x05, DX
10754  	SUBL  DX, BX
10755  	LEAQ  (CX)(BX*1), BX
10756  	MOVQ  BX, (SP)
10757  	MOVL  $0x00000001, DX
10758  	MOVL  DX, 16(SP)
10759  	MOVQ  src_base+24(FP), BX
10760  
10761  search_loop_encodeSnappyBlockAsm:
10762  	MOVL  DX, SI
10763  	SUBL  12(SP), SI
10764  	SHRL  $0x06, SI
10765  	LEAL  4(DX)(SI*1), SI
10766  	CMPL  SI, 8(SP)
10767  	JAE   emit_remainder_encodeSnappyBlockAsm
10768  	MOVQ  (BX)(DX*1), DI
10769  	MOVL  SI, 20(SP)
10770  	MOVQ  $0x0000cf1bbcdcbf9b, R9
10771  	MOVQ  DI, R10
10772  	MOVQ  DI, R11
10773  	SHRQ  $0x08, R11
10774  	SHLQ  $0x10, R10
10775  	IMULQ R9, R10
10776  	SHRQ  $0x32, R10
10777  	SHLQ  $0x10, R11
10778  	IMULQ R9, R11
10779  	SHRQ  $0x32, R11
10780  	MOVL  (AX)(R10*4), SI
10781  	MOVL  (AX)(R11*4), R8
10782  	MOVL  DX, (AX)(R10*4)
10783  	LEAL  1(DX), R10
10784  	MOVL  R10, (AX)(R11*4)
10785  	MOVQ  DI, R10
10786  	SHRQ  $0x10, R10
10787  	SHLQ  $0x10, R10
10788  	IMULQ R9, R10
10789  	SHRQ  $0x32, R10
10790  	MOVL  DX, R9
10791  	SUBL  16(SP), R9
10792  	MOVL  1(BX)(R9*1), R11
10793  	MOVQ  DI, R9
10794  	SHRQ  $0x08, R9
10795  	CMPL  R9, R11
10796  	JNE   no_repeat_found_encodeSnappyBlockAsm
10797  	LEAL  1(DX), DI
10798  	MOVL  12(SP), SI
10799  	MOVL  DI, R8
10800  	SUBL  16(SP), R8
10801  	JZ    repeat_extend_back_end_encodeSnappyBlockAsm
10802  
10803  repeat_extend_back_loop_encodeSnappyBlockAsm:
10804  	CMPL DI, SI
10805  	JBE  repeat_extend_back_end_encodeSnappyBlockAsm
10806  	MOVB -1(BX)(R8*1), R9
10807  	MOVB -1(BX)(DI*1), R10
10808  	CMPB R9, R10
10809  	JNE  repeat_extend_back_end_encodeSnappyBlockAsm
10810  	LEAL -1(DI), DI
10811  	DECL R8
10812  	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm
10813  
10814  repeat_extend_back_end_encodeSnappyBlockAsm:
10815  	MOVL DI, SI
10816  	SUBL 12(SP), SI
10817  	LEAQ 5(CX)(SI*1), SI
10818  	CMPQ SI, (SP)
10819  	JB   repeat_dst_size_check_encodeSnappyBlockAsm
10820  	MOVQ $0x00000000, ret+56(FP)
10821  	RET
10822  
10823  repeat_dst_size_check_encodeSnappyBlockAsm:
10824  	MOVL 12(SP), SI
10825  	CMPL SI, DI
10826  	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm
10827  	MOVL DI, R8
10828  	MOVL DI, 12(SP)
10829  	LEAQ (BX)(SI*1), R9
10830  	SUBL SI, R8
10831  	LEAL -1(R8), SI
10832  	CMPL SI, $0x3c
10833  	JB   one_byte_repeat_emit_encodeSnappyBlockAsm
10834  	CMPL SI, $0x00000100
10835  	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm
10836  	CMPL SI, $0x00010000
10837  	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm
10838  	CMPL SI, $0x01000000
10839  	JB   four_bytes_repeat_emit_encodeSnappyBlockAsm
10840  	MOVB $0xfc, (CX)
10841  	MOVL SI, 1(CX)
10842  	ADDQ $0x05, CX
10843  	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
10844  
10845  four_bytes_repeat_emit_encodeSnappyBlockAsm:
10846  	MOVL SI, R10
10847  	SHRL $0x10, R10
10848  	MOVB $0xf8, (CX)
10849  	MOVW SI, 1(CX)
10850  	MOVB R10, 3(CX)
10851  	ADDQ $0x04, CX
10852  	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
10853  
10854  three_bytes_repeat_emit_encodeSnappyBlockAsm:
10855  	MOVB $0xf4, (CX)
10856  	MOVW SI, 1(CX)
10857  	ADDQ $0x03, CX
10858  	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
10859  
10860  two_bytes_repeat_emit_encodeSnappyBlockAsm:
10861  	MOVB $0xf0, (CX)
10862  	MOVB SI, 1(CX)
10863  	ADDQ $0x02, CX
10864  	CMPL SI, $0x40
10865  	JB   memmove_repeat_emit_encodeSnappyBlockAsm
10866  	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
10867  
10868  one_byte_repeat_emit_encodeSnappyBlockAsm:
10869  	SHLB $0x02, SI
10870  	MOVB SI, (CX)
10871  	ADDQ $0x01, CX
10872  
10873  memmove_repeat_emit_encodeSnappyBlockAsm:
10874  	LEAQ (CX)(R8*1), SI
10875  
10876  	// genMemMoveShort
10877  	CMPQ R8, $0x08
10878  	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8
10879  	CMPQ R8, $0x10
10880  	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16
10881  	CMPQ R8, $0x20
10882  	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32
10883  	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64
10884  
10885  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8:
10886  	MOVQ (R9), R10
10887  	MOVQ R10, (CX)
10888  	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
10889  
10890  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16:
10891  	MOVQ (R9), R10
10892  	MOVQ -8(R9)(R8*1), R9
10893  	MOVQ R10, (CX)
10894  	MOVQ R9, -8(CX)(R8*1)
10895  	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
10896  
10897  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32:
10898  	MOVOU (R9), X0
10899  	MOVOU -16(R9)(R8*1), X1
10900  	MOVOU X0, (CX)
10901  	MOVOU X1, -16(CX)(R8*1)
10902  	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
10903  
10904  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64:
10905  	MOVOU (R9), X0
10906  	MOVOU 16(R9), X1
10907  	MOVOU -32(R9)(R8*1), X2
10908  	MOVOU -16(R9)(R8*1), X3
10909  	MOVOU X0, (CX)
10910  	MOVOU X1, 16(CX)
10911  	MOVOU X2, -32(CX)(R8*1)
10912  	MOVOU X3, -16(CX)(R8*1)
10913  
10914  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm:
10915  	MOVQ SI, CX
10916  	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm
10917  
10918  memmove_long_repeat_emit_encodeSnappyBlockAsm:
10919  	LEAQ (CX)(R8*1), SI
10920  
10921  	// genMemMoveLong
10922  	MOVOU (R9), X0
10923  	MOVOU 16(R9), X1
10924  	MOVOU -32(R9)(R8*1), X2
10925  	MOVOU -16(R9)(R8*1), X3
10926  	MOVQ  R8, R11
10927  	SHRQ  $0x05, R11
10928  	MOVQ  CX, R10
10929  	ANDL  $0x0000001f, R10
10930  	MOVQ  $0x00000040, R12
10931  	SUBQ  R10, R12
10932  	DECQ  R11
10933  	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
10934  	LEAQ  -32(R9)(R12*1), R10
10935  	LEAQ  -32(CX)(R12*1), R13
10936  
10937  emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back:
10938  	MOVOU (R10), X4
10939  	MOVOU 16(R10), X5
10940  	MOVOA X4, (R13)
10941  	MOVOA X5, 16(R13)
10942  	ADDQ  $0x20, R13
10943  	ADDQ  $0x20, R10
10944  	ADDQ  $0x20, R12
10945  	DECQ  R11
10946  	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back
10947  
10948  emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
10949  	MOVOU -32(R9)(R12*1), X4
10950  	MOVOU -16(R9)(R12*1), X5
10951  	MOVOA X4, -32(CX)(R12*1)
10952  	MOVOA X5, -16(CX)(R12*1)
10953  	ADDQ  $0x20, R12
10954  	CMPQ  R8, R12
10955  	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
10956  	MOVOU X0, (CX)
10957  	MOVOU X1, 16(CX)
10958  	MOVOU X2, -32(CX)(R8*1)
10959  	MOVOU X3, -16(CX)(R8*1)
10960  	MOVQ  SI, CX
10961  
10962  emit_literal_done_repeat_emit_encodeSnappyBlockAsm:
10963  	ADDL $0x05, DX
10964  	MOVL DX, SI
10965  	SUBL 16(SP), SI
10966  	MOVQ src_len+32(FP), R8
10967  	SUBL DX, R8
10968  	LEAQ (BX)(DX*1), R9
10969  	LEAQ (BX)(SI*1), SI
10970  
10971  	// matchLen
10972  	XORL R11, R11
10973  
10974  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm:
10975  	CMPL R8, $0x10
10976  	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm
10977  	MOVQ (R9)(R11*1), R10
10978  	MOVQ 8(R9)(R11*1), R12
10979  	XORQ (SI)(R11*1), R10
10980  	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm
10981  	XORQ 8(SI)(R11*1), R12
10982  	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm
10983  	LEAL -16(R8), R8
10984  	LEAL 16(R11), R11
10985  	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm
10986  
10987  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm:
10988  #ifdef GOAMD64_v3
10989  	TZCNTQ R12, R12
10990  
10991  #else
10992  	BSFQ R12, R12
10993  
10994  #endif
10995  	SARQ $0x03, R12
10996  	LEAL 8(R11)(R12*1), R11
10997  	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm
10998  
10999  matchlen_match8_repeat_extend_encodeSnappyBlockAsm:
11000  	CMPL R8, $0x08
11001  	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm
11002  	MOVQ (R9)(R11*1), R10
11003  	XORQ (SI)(R11*1), R10
11004  	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm
11005  	LEAL -8(R8), R8
11006  	LEAL 8(R11), R11
11007  	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm
11008  
11009  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm:
11010  #ifdef GOAMD64_v3
11011  	TZCNTQ R10, R10
11012  
11013  #else
11014  	BSFQ R10, R10
11015  
11016  #endif
11017  	SARQ $0x03, R10
11018  	LEAL (R11)(R10*1), R11
11019  	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm
11020  
11021  matchlen_match4_repeat_extend_encodeSnappyBlockAsm:
11022  	CMPL R8, $0x04
11023  	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm
11024  	MOVL (R9)(R11*1), R10
11025  	CMPL (SI)(R11*1), R10
11026  	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm
11027  	LEAL -4(R8), R8
11028  	LEAL 4(R11), R11
11029  
11030  matchlen_match2_repeat_extend_encodeSnappyBlockAsm:
11031  	CMPL R8, $0x01
11032  	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm
11033  	JB   repeat_extend_forward_end_encodeSnappyBlockAsm
11034  	MOVW (R9)(R11*1), R10
11035  	CMPW (SI)(R11*1), R10
11036  	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm
11037  	LEAL 2(R11), R11
11038  	SUBL $0x02, R8
11039  	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm
11040  
11041  matchlen_match1_repeat_extend_encodeSnappyBlockAsm:
11042  	MOVB (R9)(R11*1), R10
11043  	CMPB (SI)(R11*1), R10
11044  	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm
11045  	LEAL 1(R11), R11
11046  
11047  repeat_extend_forward_end_encodeSnappyBlockAsm:
11048  	ADDL R11, DX
11049  	MOVL DX, SI
11050  	SUBL DI, SI
11051  	MOVL 16(SP), DI
11052  
11053  	// emitCopy
11054  	CMPL DI, $0x00010000
11055  	JB   two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
11056  
11057  four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm:
11058  	CMPL SI, $0x40
11059  	JBE  four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
11060  	MOVB $0xff, (CX)
11061  	MOVL DI, 1(CX)
11062  	LEAL -64(SI), SI
11063  	ADDQ $0x05, CX
11064  	CMPL SI, $0x04
11065  	JB   four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
11066  	JMP  four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm
11067  
11068  four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm:
11069  	TESTL SI, SI
11070  	JZ    repeat_end_emit_encodeSnappyBlockAsm
11071  	XORL  R8, R8
11072  	LEAL  -1(R8)(SI*4), SI
11073  	MOVB  SI, (CX)
11074  	MOVL  DI, 1(CX)
11075  	ADDQ  $0x05, CX
11076  	JMP   repeat_end_emit_encodeSnappyBlockAsm
11077  
11078  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm:
11079  	CMPL SI, $0x40
11080  	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm
11081  	MOVB $0xee, (CX)
11082  	MOVW DI, 1(CX)
11083  	LEAL -60(SI), SI
11084  	ADDQ $0x03, CX
11085  	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
11086  
11087  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm:
11088  	MOVL SI, R8
11089  	SHLL $0x02, R8
11090  	CMPL SI, $0x0c
11091  	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
11092  	CMPL DI, $0x00000800
11093  	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
11094  	LEAL -15(R8), R8
11095  	MOVB DI, 1(CX)
11096  	SHRL $0x08, DI
11097  	SHLL $0x05, DI
11098  	ORL  DI, R8
11099  	MOVB R8, (CX)
11100  	ADDQ $0x02, CX
11101  	JMP  repeat_end_emit_encodeSnappyBlockAsm
11102  
11103  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm:
11104  	LEAL -2(R8), R8
11105  	MOVB R8, (CX)
11106  	MOVW DI, 1(CX)
11107  	ADDQ $0x03, CX
11108  
11109  repeat_end_emit_encodeSnappyBlockAsm:
11110  	MOVL DX, 12(SP)
11111  	JMP  search_loop_encodeSnappyBlockAsm
11112  
11113  no_repeat_found_encodeSnappyBlockAsm:
11114  	CMPL (BX)(SI*1), DI
11115  	JEQ  candidate_match_encodeSnappyBlockAsm
11116  	SHRQ $0x08, DI
11117  	MOVL (AX)(R10*4), SI
11118  	LEAL 2(DX), R9
11119  	CMPL (BX)(R8*1), DI
11120  	JEQ  candidate2_match_encodeSnappyBlockAsm
11121  	MOVL R9, (AX)(R10*4)
11122  	SHRQ $0x08, DI
11123  	CMPL (BX)(SI*1), DI
11124  	JEQ  candidate3_match_encodeSnappyBlockAsm
11125  	MOVL 20(SP), DX
11126  	JMP  search_loop_encodeSnappyBlockAsm
11127  
11128  candidate3_match_encodeSnappyBlockAsm:
11129  	ADDL $0x02, DX
11130  	JMP  candidate_match_encodeSnappyBlockAsm
11131  
11132  candidate2_match_encodeSnappyBlockAsm:
11133  	MOVL R9, (AX)(R10*4)
11134  	INCL DX
11135  	MOVL R8, SI
11136  
11137  candidate_match_encodeSnappyBlockAsm:
11138  	MOVL  12(SP), DI
11139  	TESTL SI, SI
11140  	JZ    match_extend_back_end_encodeSnappyBlockAsm
11141  
11142  match_extend_back_loop_encodeSnappyBlockAsm:
11143  	CMPL DX, DI
11144  	JBE  match_extend_back_end_encodeSnappyBlockAsm
11145  	MOVB -1(BX)(SI*1), R8
11146  	MOVB -1(BX)(DX*1), R9
11147  	CMPB R8, R9
11148  	JNE  match_extend_back_end_encodeSnappyBlockAsm
11149  	LEAL -1(DX), DX
11150  	DECL SI
11151  	JZ   match_extend_back_end_encodeSnappyBlockAsm
11152  	JMP  match_extend_back_loop_encodeSnappyBlockAsm
11153  
11154  match_extend_back_end_encodeSnappyBlockAsm:
11155  	MOVL DX, DI
11156  	SUBL 12(SP), DI
11157  	LEAQ 5(CX)(DI*1), DI
11158  	CMPQ DI, (SP)
11159  	JB   match_dst_size_check_encodeSnappyBlockAsm
11160  	MOVQ $0x00000000, ret+56(FP)
11161  	RET
11162  
11163  match_dst_size_check_encodeSnappyBlockAsm:
11164  	MOVL DX, DI
11165  	MOVL 12(SP), R8
11166  	CMPL R8, DI
11167  	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm
11168  	MOVL DI, R9
11169  	MOVL DI, 12(SP)
11170  	LEAQ (BX)(R8*1), DI
11171  	SUBL R8, R9
11172  	LEAL -1(R9), R8
11173  	CMPL R8, $0x3c
11174  	JB   one_byte_match_emit_encodeSnappyBlockAsm
11175  	CMPL R8, $0x00000100
11176  	JB   two_bytes_match_emit_encodeSnappyBlockAsm
11177  	CMPL R8, $0x00010000
11178  	JB   three_bytes_match_emit_encodeSnappyBlockAsm
11179  	CMPL R8, $0x01000000
11180  	JB   four_bytes_match_emit_encodeSnappyBlockAsm
11181  	MOVB $0xfc, (CX)
11182  	MOVL R8, 1(CX)
11183  	ADDQ $0x05, CX
11184  	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
11185  
11186  four_bytes_match_emit_encodeSnappyBlockAsm:
11187  	MOVL R8, R10
11188  	SHRL $0x10, R10
11189  	MOVB $0xf8, (CX)
11190  	MOVW R8, 1(CX)
11191  	MOVB R10, 3(CX)
11192  	ADDQ $0x04, CX
11193  	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
11194  
11195  three_bytes_match_emit_encodeSnappyBlockAsm:
11196  	MOVB $0xf4, (CX)
11197  	MOVW R8, 1(CX)
11198  	ADDQ $0x03, CX
11199  	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
11200  
11201  two_bytes_match_emit_encodeSnappyBlockAsm:
11202  	MOVB $0xf0, (CX)
11203  	MOVB R8, 1(CX)
11204  	ADDQ $0x02, CX
11205  	CMPL R8, $0x40
11206  	JB   memmove_match_emit_encodeSnappyBlockAsm
11207  	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
11208  
11209  one_byte_match_emit_encodeSnappyBlockAsm:
11210  	SHLB $0x02, R8
11211  	MOVB R8, (CX)
11212  	ADDQ $0x01, CX
11213  
11214  memmove_match_emit_encodeSnappyBlockAsm:
11215  	LEAQ (CX)(R9*1), R8
11216  
11217  	// genMemMoveShort
11218  	CMPQ R9, $0x08
11219  	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8
11220  	CMPQ R9, $0x10
11221  	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16
11222  	CMPQ R9, $0x20
11223  	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32
11224  	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64
11225  
11226  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8:
11227  	MOVQ (DI), R10
11228  	MOVQ R10, (CX)
11229  	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm
11230  
11231  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16:
11232  	MOVQ (DI), R10
11233  	MOVQ -8(DI)(R9*1), DI
11234  	MOVQ R10, (CX)
11235  	MOVQ DI, -8(CX)(R9*1)
11236  	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm
11237  
11238  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32:
11239  	MOVOU (DI), X0
11240  	MOVOU -16(DI)(R9*1), X1
11241  	MOVOU X0, (CX)
11242  	MOVOU X1, -16(CX)(R9*1)
11243  	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm
11244  
11245  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64:
11246  	MOVOU (DI), X0
11247  	MOVOU 16(DI), X1
11248  	MOVOU -32(DI)(R9*1), X2
11249  	MOVOU -16(DI)(R9*1), X3
11250  	MOVOU X0, (CX)
11251  	MOVOU X1, 16(CX)
11252  	MOVOU X2, -32(CX)(R9*1)
11253  	MOVOU X3, -16(CX)(R9*1)
11254  
11255  memmove_end_copy_match_emit_encodeSnappyBlockAsm:
11256  	MOVQ R8, CX
11257  	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm
11258  
11259  memmove_long_match_emit_encodeSnappyBlockAsm:
11260  	LEAQ (CX)(R9*1), R8
11261  
11262  	// genMemMoveLong
11263  	MOVOU (DI), X0
11264  	MOVOU 16(DI), X1
11265  	MOVOU -32(DI)(R9*1), X2
11266  	MOVOU -16(DI)(R9*1), X3
11267  	MOVQ  R9, R11
11268  	SHRQ  $0x05, R11
11269  	MOVQ  CX, R10
11270  	ANDL  $0x0000001f, R10
11271  	MOVQ  $0x00000040, R12
11272  	SUBQ  R10, R12
11273  	DECQ  R11
11274  	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
11275  	LEAQ  -32(DI)(R12*1), R10
11276  	LEAQ  -32(CX)(R12*1), R13
11277  
11278  emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back:
11279  	MOVOU (R10), X4
11280  	MOVOU 16(R10), X5
11281  	MOVOA X4, (R13)
11282  	MOVOA X5, 16(R13)
11283  	ADDQ  $0x20, R13
11284  	ADDQ  $0x20, R10
11285  	ADDQ  $0x20, R12
11286  	DECQ  R11
11287  	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back
11288  
11289  emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
11290  	MOVOU -32(DI)(R12*1), X4
11291  	MOVOU -16(DI)(R12*1), X5
11292  	MOVOA X4, -32(CX)(R12*1)
11293  	MOVOA X5, -16(CX)(R12*1)
11294  	ADDQ  $0x20, R12
11295  	CMPQ  R9, R12
11296  	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
11297  	MOVOU X0, (CX)
11298  	MOVOU X1, 16(CX)
11299  	MOVOU X2, -32(CX)(R9*1)
11300  	MOVOU X3, -16(CX)(R9*1)
11301  	MOVQ  R8, CX
11302  
11303  emit_literal_done_match_emit_encodeSnappyBlockAsm:
11304  match_nolit_loop_encodeSnappyBlockAsm:
11305  	MOVL DX, DI
11306  	SUBL SI, DI
11307  	MOVL DI, 16(SP)
11308  	ADDL $0x04, DX
11309  	ADDL $0x04, SI
11310  	MOVQ src_len+32(FP), DI
11311  	SUBL DX, DI
11312  	LEAQ (BX)(DX*1), R8
11313  	LEAQ (BX)(SI*1), SI
11314  
11315  	// matchLen
11316  	XORL R10, R10
11317  
11318  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm:
11319  	CMPL DI, $0x10
11320  	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm
11321  	MOVQ (R8)(R10*1), R9
11322  	MOVQ 8(R8)(R10*1), R11
11323  	XORQ (SI)(R10*1), R9
11324  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm
11325  	XORQ 8(SI)(R10*1), R11
11326  	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm
11327  	LEAL -16(DI), DI
11328  	LEAL 16(R10), R10
11329  	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm
11330  
11331  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm:
11332  #ifdef GOAMD64_v3
11333  	TZCNTQ R11, R11
11334  
11335  #else
11336  	BSFQ R11, R11
11337  
11338  #endif
11339  	SARQ $0x03, R11
11340  	LEAL 8(R10)(R11*1), R10
11341  	JMP  match_nolit_end_encodeSnappyBlockAsm
11342  
11343  matchlen_match8_match_nolit_encodeSnappyBlockAsm:
11344  	CMPL DI, $0x08
11345  	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm
11346  	MOVQ (R8)(R10*1), R9
11347  	XORQ (SI)(R10*1), R9
11348  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm
11349  	LEAL -8(DI), DI
11350  	LEAL 8(R10), R10
11351  	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm
11352  
11353  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm:
11354  #ifdef GOAMD64_v3
11355  	TZCNTQ R9, R9
11356  
11357  #else
11358  	BSFQ R9, R9
11359  
11360  #endif
11361  	SARQ $0x03, R9
11362  	LEAL (R10)(R9*1), R10
11363  	JMP  match_nolit_end_encodeSnappyBlockAsm
11364  
11365  matchlen_match4_match_nolit_encodeSnappyBlockAsm:
11366  	CMPL DI, $0x04
11367  	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm
11368  	MOVL (R8)(R10*1), R9
11369  	CMPL (SI)(R10*1), R9
11370  	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm
11371  	LEAL -4(DI), DI
11372  	LEAL 4(R10), R10
11373  
11374  matchlen_match2_match_nolit_encodeSnappyBlockAsm:
11375  	CMPL DI, $0x01
11376  	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm
11377  	JB   match_nolit_end_encodeSnappyBlockAsm
11378  	MOVW (R8)(R10*1), R9
11379  	CMPW (SI)(R10*1), R9
11380  	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm
11381  	LEAL 2(R10), R10
11382  	SUBL $0x02, DI
11383  	JZ   match_nolit_end_encodeSnappyBlockAsm
11384  
11385  matchlen_match1_match_nolit_encodeSnappyBlockAsm:
11386  	MOVB (R8)(R10*1), R9
11387  	CMPB (SI)(R10*1), R9
11388  	JNE  match_nolit_end_encodeSnappyBlockAsm
11389  	LEAL 1(R10), R10
11390  
11391  match_nolit_end_encodeSnappyBlockAsm:
11392  	ADDL R10, DX
11393  	MOVL 16(SP), SI
11394  	ADDL $0x04, R10
11395  	MOVL DX, 12(SP)
11396  
11397  	// emitCopy
11398  	CMPL SI, $0x00010000
11399  	JB   two_byte_offset_match_nolit_encodeSnappyBlockAsm
11400  
11401  four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm:
11402  	CMPL R10, $0x40
11403  	JBE  four_bytes_remain_match_nolit_encodeSnappyBlockAsm
11404  	MOVB $0xff, (CX)
11405  	MOVL SI, 1(CX)
11406  	LEAL -64(R10), R10
11407  	ADDQ $0x05, CX
11408  	CMPL R10, $0x04
11409  	JB   four_bytes_remain_match_nolit_encodeSnappyBlockAsm
11410  	JMP  four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm
11411  
11412  four_bytes_remain_match_nolit_encodeSnappyBlockAsm:
11413  	TESTL R10, R10
11414  	JZ    match_nolit_emitcopy_end_encodeSnappyBlockAsm
11415  	XORL  DI, DI
11416  	LEAL  -1(DI)(R10*4), R10
11417  	MOVB  R10, (CX)
11418  	MOVL  SI, 1(CX)
11419  	ADDQ  $0x05, CX
11420  	JMP   match_nolit_emitcopy_end_encodeSnappyBlockAsm
11421  
11422  two_byte_offset_match_nolit_encodeSnappyBlockAsm:
11423  	CMPL R10, $0x40
11424  	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm
11425  	MOVB $0xee, (CX)
11426  	MOVW SI, 1(CX)
11427  	LEAL -60(R10), R10
11428  	ADDQ $0x03, CX
11429  	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm
11430  
11431  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm:
11432  	MOVL R10, DI
11433  	SHLL $0x02, DI
11434  	CMPL R10, $0x0c
11435  	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm
11436  	CMPL SI, $0x00000800
11437  	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm
11438  	LEAL -15(DI), DI
11439  	MOVB SI, 1(CX)
11440  	SHRL $0x08, SI
11441  	SHLL $0x05, SI
11442  	ORL  SI, DI
11443  	MOVB DI, (CX)
11444  	ADDQ $0x02, CX
11445  	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm
11446  
11447  emit_copy_three_match_nolit_encodeSnappyBlockAsm:
11448  	LEAL -2(DI), DI
11449  	MOVB DI, (CX)
11450  	MOVW SI, 1(CX)
11451  	ADDQ $0x03, CX
11452  
11453  match_nolit_emitcopy_end_encodeSnappyBlockAsm:
11454  	CMPL DX, 8(SP)
11455  	JAE  emit_remainder_encodeSnappyBlockAsm
11456  	MOVQ -2(BX)(DX*1), DI
11457  	CMPQ CX, (SP)
11458  	JB   match_nolit_dst_ok_encodeSnappyBlockAsm
11459  	MOVQ $0x00000000, ret+56(FP)
11460  	RET
11461  
11462  match_nolit_dst_ok_encodeSnappyBlockAsm:
11463  	MOVQ  $0x0000cf1bbcdcbf9b, R9
11464  	MOVQ  DI, R8
11465  	SHRQ  $0x10, DI
11466  	MOVQ  DI, SI
11467  	SHLQ  $0x10, R8
11468  	IMULQ R9, R8
11469  	SHRQ  $0x32, R8
11470  	SHLQ  $0x10, SI
11471  	IMULQ R9, SI
11472  	SHRQ  $0x32, SI
11473  	LEAL  -2(DX), R9
11474  	LEAQ  (AX)(SI*4), R10
11475  	MOVL  (R10), SI
11476  	MOVL  R9, (AX)(R8*4)
11477  	MOVL  DX, (R10)
11478  	CMPL  (BX)(SI*1), DI
11479  	JEQ   match_nolit_loop_encodeSnappyBlockAsm
11480  	INCL  DX
11481  	JMP   search_loop_encodeSnappyBlockAsm
11482  
11483  emit_remainder_encodeSnappyBlockAsm:
11484  	MOVQ src_len+32(FP), AX
11485  	SUBL 12(SP), AX
11486  	LEAQ 5(CX)(AX*1), AX
11487  	CMPQ AX, (SP)
11488  	JB   emit_remainder_ok_encodeSnappyBlockAsm
11489  	MOVQ $0x00000000, ret+56(FP)
11490  	RET
11491  
11492  emit_remainder_ok_encodeSnappyBlockAsm:
11493  	MOVQ src_len+32(FP), AX
11494  	MOVL 12(SP), DX
11495  	CMPL DX, AX
11496  	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm
11497  	MOVL AX, SI
11498  	MOVL AX, 12(SP)
11499  	LEAQ (BX)(DX*1), AX
11500  	SUBL DX, SI
11501  	LEAL -1(SI), DX
11502  	CMPL DX, $0x3c
11503  	JB   one_byte_emit_remainder_encodeSnappyBlockAsm
11504  	CMPL DX, $0x00000100
11505  	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm
11506  	CMPL DX, $0x00010000
11507  	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm
11508  	CMPL DX, $0x01000000
11509  	JB   four_bytes_emit_remainder_encodeSnappyBlockAsm
11510  	MOVB $0xfc, (CX)
11511  	MOVL DX, 1(CX)
11512  	ADDQ $0x05, CX
11513  	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
11514  
11515  four_bytes_emit_remainder_encodeSnappyBlockAsm:
11516  	MOVL DX, BX
11517  	SHRL $0x10, BX
11518  	MOVB $0xf8, (CX)
11519  	MOVW DX, 1(CX)
11520  	MOVB BL, 3(CX)
11521  	ADDQ $0x04, CX
11522  	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
11523  
11524  three_bytes_emit_remainder_encodeSnappyBlockAsm:
11525  	MOVB $0xf4, (CX)
11526  	MOVW DX, 1(CX)
11527  	ADDQ $0x03, CX
11528  	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
11529  
11530  two_bytes_emit_remainder_encodeSnappyBlockAsm:
11531  	MOVB $0xf0, (CX)
11532  	MOVB DL, 1(CX)
11533  	ADDQ $0x02, CX
11534  	CMPL DX, $0x40
11535  	JB   memmove_emit_remainder_encodeSnappyBlockAsm
11536  	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
11537  
11538  one_byte_emit_remainder_encodeSnappyBlockAsm:
11539  	SHLB $0x02, DL
11540  	MOVB DL, (CX)
11541  	ADDQ $0x01, CX
11542  
11543  memmove_emit_remainder_encodeSnappyBlockAsm:
11544  	LEAQ (CX)(SI*1), DX
11545  	MOVL SI, BX
11546  
11547  	// genMemMoveShort
11548  	CMPQ BX, $0x03
11549  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2
11550  	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3
11551  	CMPQ BX, $0x08
11552  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7
11553  	CMPQ BX, $0x10
11554  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16
11555  	CMPQ BX, $0x20
11556  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32
11557  	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64
11558  
11559  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2:
11560  	MOVB (AX), SI
11561  	MOVB -1(AX)(BX*1), AL
11562  	MOVB SI, (CX)
11563  	MOVB AL, -1(CX)(BX*1)
11564  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
11565  
11566  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3:
11567  	MOVW (AX), SI
11568  	MOVB 2(AX), AL
11569  	MOVW SI, (CX)
11570  	MOVB AL, 2(CX)
11571  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
11572  
11573  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7:
11574  	MOVL (AX), SI
11575  	MOVL -4(AX)(BX*1), AX
11576  	MOVL SI, (CX)
11577  	MOVL AX, -4(CX)(BX*1)
11578  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
11579  
11580  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16:
11581  	MOVQ (AX), SI
11582  	MOVQ -8(AX)(BX*1), AX
11583  	MOVQ SI, (CX)
11584  	MOVQ AX, -8(CX)(BX*1)
11585  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
11586  
11587  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32:
11588  	MOVOU (AX), X0
11589  	MOVOU -16(AX)(BX*1), X1
11590  	MOVOU X0, (CX)
11591  	MOVOU X1, -16(CX)(BX*1)
11592  	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
11593  
11594  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64:
11595  	MOVOU (AX), X0
11596  	MOVOU 16(AX), X1
11597  	MOVOU -32(AX)(BX*1), X2
11598  	MOVOU -16(AX)(BX*1), X3
11599  	MOVOU X0, (CX)
11600  	MOVOU X1, 16(CX)
11601  	MOVOU X2, -32(CX)(BX*1)
11602  	MOVOU X3, -16(CX)(BX*1)
11603  
11604  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm:
11605  	MOVQ DX, CX
11606  	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm
11607  
11608  memmove_long_emit_remainder_encodeSnappyBlockAsm:
11609  	LEAQ (CX)(SI*1), DX
11610  	MOVL SI, BX
11611  
11612  	// genMemMoveLong
11613  	MOVOU (AX), X0
11614  	MOVOU 16(AX), X1
11615  	MOVOU -32(AX)(BX*1), X2
11616  	MOVOU -16(AX)(BX*1), X3
11617  	MOVQ  BX, DI
11618  	SHRQ  $0x05, DI
11619  	MOVQ  CX, SI
11620  	ANDL  $0x0000001f, SI
11621  	MOVQ  $0x00000040, R8
11622  	SUBQ  SI, R8
11623  	DECQ  DI
11624  	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
11625  	LEAQ  -32(AX)(R8*1), SI
11626  	LEAQ  -32(CX)(R8*1), R9
11627  
11628  emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back:
11629  	MOVOU (SI), X4
11630  	MOVOU 16(SI), X5
11631  	MOVOA X4, (R9)
11632  	MOVOA X5, 16(R9)
11633  	ADDQ  $0x20, R9
11634  	ADDQ  $0x20, SI
11635  	ADDQ  $0x20, R8
11636  	DECQ  DI
11637  	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back
11638  
11639  emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
11640  	MOVOU -32(AX)(R8*1), X4
11641  	MOVOU -16(AX)(R8*1), X5
11642  	MOVOA X4, -32(CX)(R8*1)
11643  	MOVOA X5, -16(CX)(R8*1)
11644  	ADDQ  $0x20, R8
11645  	CMPQ  BX, R8
11646  	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
11647  	MOVOU X0, (CX)
11648  	MOVOU X1, 16(CX)
11649  	MOVOU X2, -32(CX)(BX*1)
11650  	MOVOU X3, -16(CX)(BX*1)
11651  	MOVQ  DX, CX
11652  
11653  emit_literal_done_emit_remainder_encodeSnappyBlockAsm:
11654  	MOVQ dst_base+0(FP), AX
11655  	SUBQ AX, CX
11656  	MOVQ CX, ret+56(FP)
11657  	RET
11658  
11659  // func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int
11660  // Requires: BMI, SSE2
11661  TEXT ·encodeSnappyBlockAsm64K(SB), $24-64
11662  	MOVQ tmp+48(FP), AX
11663  	MOVQ dst_base+0(FP), CX
11664  	MOVQ $0x00000200, DX
11665  	MOVQ AX, BX
11666  	PXOR X0, X0
11667  
11668  zero_loop_encodeSnappyBlockAsm64K:
11669  	MOVOU X0, (BX)
11670  	MOVOU X0, 16(BX)
11671  	MOVOU X0, 32(BX)
11672  	MOVOU X0, 48(BX)
11673  	MOVOU X0, 64(BX)
11674  	MOVOU X0, 80(BX)
11675  	MOVOU X0, 96(BX)
11676  	MOVOU X0, 112(BX)
11677  	ADDQ  $0x80, BX
11678  	DECQ  DX
11679  	JNZ   zero_loop_encodeSnappyBlockAsm64K
11680  	MOVL  $0x00000000, 12(SP)
11681  	MOVQ  src_len+32(FP), DX
11682  	LEAQ  -9(DX), BX
11683  	LEAQ  -8(DX), SI
11684  	MOVL  SI, 8(SP)
11685  	SHRQ  $0x05, DX
11686  	SUBL  DX, BX
11687  	LEAQ  (CX)(BX*1), BX
11688  	MOVQ  BX, (SP)
11689  	MOVL  $0x00000001, DX
11690  	MOVL  DX, 16(SP)
11691  	MOVQ  src_base+24(FP), BX
11692  
11693  search_loop_encodeSnappyBlockAsm64K:
11694  	MOVL  DX, SI
11695  	SUBL  12(SP), SI
11696  	SHRL  $0x06, SI
11697  	LEAL  4(DX)(SI*1), SI
11698  	CMPL  SI, 8(SP)
11699  	JAE   emit_remainder_encodeSnappyBlockAsm64K
11700  	MOVQ  (BX)(DX*1), DI
11701  	MOVL  SI, 20(SP)
11702  	MOVQ  $0x0000cf1bbcdcbf9b, R9
11703  	MOVQ  DI, R10
11704  	MOVQ  DI, R11
11705  	SHRQ  $0x08, R11
11706  	SHLQ  $0x10, R10
11707  	IMULQ R9, R10
11708  	SHRQ  $0x32, R10
11709  	SHLQ  $0x10, R11
11710  	IMULQ R9, R11
11711  	SHRQ  $0x32, R11
11712  	MOVL  (AX)(R10*4), SI
11713  	MOVL  (AX)(R11*4), R8
11714  	MOVL  DX, (AX)(R10*4)
11715  	LEAL  1(DX), R10
11716  	MOVL  R10, (AX)(R11*4)
11717  	MOVQ  DI, R10
11718  	SHRQ  $0x10, R10
11719  	SHLQ  $0x10, R10
11720  	IMULQ R9, R10
11721  	SHRQ  $0x32, R10
11722  	MOVL  DX, R9
11723  	SUBL  16(SP), R9
11724  	MOVL  1(BX)(R9*1), R11
11725  	MOVQ  DI, R9
11726  	SHRQ  $0x08, R9
11727  	CMPL  R9, R11
11728  	JNE   no_repeat_found_encodeSnappyBlockAsm64K
11729  	LEAL  1(DX), DI
11730  	MOVL  12(SP), SI
11731  	MOVL  DI, R8
11732  	SUBL  16(SP), R8
11733  	JZ    repeat_extend_back_end_encodeSnappyBlockAsm64K
11734  
11735  repeat_extend_back_loop_encodeSnappyBlockAsm64K:
11736  	CMPL DI, SI
11737  	JBE  repeat_extend_back_end_encodeSnappyBlockAsm64K
11738  	MOVB -1(BX)(R8*1), R9
11739  	MOVB -1(BX)(DI*1), R10
11740  	CMPB R9, R10
11741  	JNE  repeat_extend_back_end_encodeSnappyBlockAsm64K
11742  	LEAL -1(DI), DI
11743  	DECL R8
11744  	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm64K
11745  
11746  repeat_extend_back_end_encodeSnappyBlockAsm64K:
11747  	MOVL DI, SI
11748  	SUBL 12(SP), SI
11749  	LEAQ 3(CX)(SI*1), SI
11750  	CMPQ SI, (SP)
11751  	JB   repeat_dst_size_check_encodeSnappyBlockAsm64K
11752  	MOVQ $0x00000000, ret+56(FP)
11753  	RET
11754  
11755  repeat_dst_size_check_encodeSnappyBlockAsm64K:
11756  	MOVL 12(SP), SI
11757  	CMPL SI, DI
11758  	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
11759  	MOVL DI, R8
11760  	MOVL DI, 12(SP)
11761  	LEAQ (BX)(SI*1), R9
11762  	SUBL SI, R8
11763  	LEAL -1(R8), SI
11764  	CMPL SI, $0x3c
11765  	JB   one_byte_repeat_emit_encodeSnappyBlockAsm64K
11766  	CMPL SI, $0x00000100
11767  	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm64K
11768  	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm64K
11769  
11770  three_bytes_repeat_emit_encodeSnappyBlockAsm64K:
11771  	MOVB $0xf4, (CX)
11772  	MOVW SI, 1(CX)
11773  	ADDQ $0x03, CX
11774  	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm64K
11775  
11776  two_bytes_repeat_emit_encodeSnappyBlockAsm64K:
11777  	MOVB $0xf0, (CX)
11778  	MOVB SI, 1(CX)
11779  	ADDQ $0x02, CX
11780  	CMPL SI, $0x40
11781  	JB   memmove_repeat_emit_encodeSnappyBlockAsm64K
11782  	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm64K
11783  
11784  one_byte_repeat_emit_encodeSnappyBlockAsm64K:
11785  	SHLB $0x02, SI
11786  	MOVB SI, (CX)
11787  	ADDQ $0x01, CX
11788  
11789  memmove_repeat_emit_encodeSnappyBlockAsm64K:
11790  	LEAQ (CX)(R8*1), SI
11791  
11792  	// genMemMoveShort
11793  	CMPQ R8, $0x08
11794  	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8
11795  	CMPQ R8, $0x10
11796  	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
11797  	CMPQ R8, $0x20
11798  	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
11799  	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
11800  
11801  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8:
11802  	MOVQ (R9), R10
11803  	MOVQ R10, (CX)
11804  	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
11805  
11806  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
11807  	MOVQ (R9), R10
11808  	MOVQ -8(R9)(R8*1), R9
11809  	MOVQ R10, (CX)
11810  	MOVQ R9, -8(CX)(R8*1)
11811  	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
11812  
11813  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
11814  	MOVOU (R9), X0
11815  	MOVOU -16(R9)(R8*1), X1
11816  	MOVOU X0, (CX)
11817  	MOVOU X1, -16(CX)(R8*1)
11818  	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
11819  
11820  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
11821  	MOVOU (R9), X0
11822  	MOVOU 16(R9), X1
11823  	MOVOU -32(R9)(R8*1), X2
11824  	MOVOU -16(R9)(R8*1), X3
11825  	MOVOU X0, (CX)
11826  	MOVOU X1, 16(CX)
11827  	MOVOU X2, -32(CX)(R8*1)
11828  	MOVOU X3, -16(CX)(R8*1)
11829  
11830  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K:
11831  	MOVQ SI, CX
11832  	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
11833  
11834  memmove_long_repeat_emit_encodeSnappyBlockAsm64K:
11835  	LEAQ (CX)(R8*1), SI
11836  
11837  	// genMemMoveLong
11838  	MOVOU (R9), X0
11839  	MOVOU 16(R9), X1
11840  	MOVOU -32(R9)(R8*1), X2
11841  	MOVOU -16(R9)(R8*1), X3
11842  	MOVQ  R8, R11
11843  	SHRQ  $0x05, R11
11844  	MOVQ  CX, R10
11845  	ANDL  $0x0000001f, R10
11846  	MOVQ  $0x00000040, R12
11847  	SUBQ  R10, R12
11848  	DECQ  R11
11849  	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
11850  	LEAQ  -32(R9)(R12*1), R10
11851  	LEAQ  -32(CX)(R12*1), R13
11852  
11853  emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
11854  	MOVOU (R10), X4
11855  	MOVOU 16(R10), X5
11856  	MOVOA X4, (R13)
11857  	MOVOA X5, 16(R13)
11858  	ADDQ  $0x20, R13
11859  	ADDQ  $0x20, R10
11860  	ADDQ  $0x20, R12
11861  	DECQ  R11
11862  	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
11863  
11864  emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
11865  	MOVOU -32(R9)(R12*1), X4
11866  	MOVOU -16(R9)(R12*1), X5
11867  	MOVOA X4, -32(CX)(R12*1)
11868  	MOVOA X5, -16(CX)(R12*1)
11869  	ADDQ  $0x20, R12
11870  	CMPQ  R8, R12
11871  	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
11872  	MOVOU X0, (CX)
11873  	MOVOU X1, 16(CX)
11874  	MOVOU X2, -32(CX)(R8*1)
11875  	MOVOU X3, -16(CX)(R8*1)
11876  	MOVQ  SI, CX
11877  
11878  emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K:
11879  	ADDL $0x05, DX
11880  	MOVL DX, SI
11881  	SUBL 16(SP), SI
11882  	MOVQ src_len+32(FP), R8
11883  	SUBL DX, R8
11884  	LEAQ (BX)(DX*1), R9
11885  	LEAQ (BX)(SI*1), SI
11886  
11887  	// matchLen
11888  	XORL R11, R11
11889  
11890  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K:
11891  	CMPL R8, $0x10
11892  	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K
11893  	MOVQ (R9)(R11*1), R10
11894  	MOVQ 8(R9)(R11*1), R12
11895  	XORQ (SI)(R11*1), R10
11896  	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K
11897  	XORQ 8(SI)(R11*1), R12
11898  	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K
11899  	LEAL -16(R8), R8
11900  	LEAL 16(R11), R11
11901  	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K
11902  
11903  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K:
11904  #ifdef GOAMD64_v3
11905  	TZCNTQ R12, R12
11906  
11907  #else
11908  	BSFQ R12, R12
11909  
11910  #endif
11911  	SARQ $0x03, R12
11912  	LEAL 8(R11)(R12*1), R11
11913  	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm64K
11914  
11915  matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K:
11916  	CMPL R8, $0x08
11917  	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
11918  	MOVQ (R9)(R11*1), R10
11919  	XORQ (SI)(R11*1), R10
11920  	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K
11921  	LEAL -8(R8), R8
11922  	LEAL 8(R11), R11
11923  	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
11924  
11925  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K:
11926  #ifdef GOAMD64_v3
11927  	TZCNTQ R10, R10
11928  
11929  #else
11930  	BSFQ R10, R10
11931  
11932  #endif
11933  	SARQ $0x03, R10
11934  	LEAL (R11)(R10*1), R11
11935  	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm64K
11936  
11937  matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K:
11938  	CMPL R8, $0x04
11939  	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
11940  	MOVL (R9)(R11*1), R10
11941  	CMPL (SI)(R11*1), R10
11942  	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
11943  	LEAL -4(R8), R8
11944  	LEAL 4(R11), R11
11945  
11946  matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K:
11947  	CMPL R8, $0x01
11948  	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
11949  	JB   repeat_extend_forward_end_encodeSnappyBlockAsm64K
11950  	MOVW (R9)(R11*1), R10
11951  	CMPW (SI)(R11*1), R10
11952  	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
11953  	LEAL 2(R11), R11
11954  	SUBL $0x02, R8
11955  	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm64K
11956  
11957  matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K:
11958  	MOVB (R9)(R11*1), R10
11959  	CMPB (SI)(R11*1), R10
11960  	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm64K
11961  	LEAL 1(R11), R11
11962  
11963  repeat_extend_forward_end_encodeSnappyBlockAsm64K:
11964  	ADDL R11, DX
11965  	MOVL DX, SI
11966  	SUBL DI, SI
11967  	MOVL 16(SP), DI
11968  
11969  	// emitCopy
11970  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K:
11971  	CMPL SI, $0x40
11972  	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K
11973  	MOVB $0xee, (CX)
11974  	MOVW DI, 1(CX)
11975  	LEAL -60(SI), SI
11976  	ADDQ $0x03, CX
11977  	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K
11978  
11979  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K:
11980  	MOVL SI, R8
11981  	SHLL $0x02, R8
11982  	CMPL SI, $0x0c
11983  	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
11984  	CMPL DI, $0x00000800
11985  	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
11986  	LEAL -15(R8), R8
11987  	MOVB DI, 1(CX)
11988  	SHRL $0x08, DI
11989  	SHLL $0x05, DI
11990  	ORL  DI, R8
11991  	MOVB R8, (CX)
11992  	ADDQ $0x02, CX
11993  	JMP  repeat_end_emit_encodeSnappyBlockAsm64K
11994  
11995  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K:
11996  	LEAL -2(R8), R8
11997  	MOVB R8, (CX)
11998  	MOVW DI, 1(CX)
11999  	ADDQ $0x03, CX
12000  
12001  repeat_end_emit_encodeSnappyBlockAsm64K:
12002  	MOVL DX, 12(SP)
12003  	JMP  search_loop_encodeSnappyBlockAsm64K
12004  
12005  no_repeat_found_encodeSnappyBlockAsm64K:
12006  	CMPL (BX)(SI*1), DI
12007  	JEQ  candidate_match_encodeSnappyBlockAsm64K
12008  	SHRQ $0x08, DI
12009  	MOVL (AX)(R10*4), SI
12010  	LEAL 2(DX), R9
12011  	CMPL (BX)(R8*1), DI
12012  	JEQ  candidate2_match_encodeSnappyBlockAsm64K
12013  	MOVL R9, (AX)(R10*4)
12014  	SHRQ $0x08, DI
12015  	CMPL (BX)(SI*1), DI
12016  	JEQ  candidate3_match_encodeSnappyBlockAsm64K
12017  	MOVL 20(SP), DX
12018  	JMP  search_loop_encodeSnappyBlockAsm64K
12019  
12020  candidate3_match_encodeSnappyBlockAsm64K:
12021  	ADDL $0x02, DX
12022  	JMP  candidate_match_encodeSnappyBlockAsm64K
12023  
12024  candidate2_match_encodeSnappyBlockAsm64K:
12025  	MOVL R9, (AX)(R10*4)
12026  	INCL DX
12027  	MOVL R8, SI
12028  
12029  candidate_match_encodeSnappyBlockAsm64K:
12030  	MOVL  12(SP), DI
12031  	TESTL SI, SI
12032  	JZ    match_extend_back_end_encodeSnappyBlockAsm64K
12033  
12034  match_extend_back_loop_encodeSnappyBlockAsm64K:
12035  	CMPL DX, DI
12036  	JBE  match_extend_back_end_encodeSnappyBlockAsm64K
12037  	MOVB -1(BX)(SI*1), R8
12038  	MOVB -1(BX)(DX*1), R9
12039  	CMPB R8, R9
12040  	JNE  match_extend_back_end_encodeSnappyBlockAsm64K
12041  	LEAL -1(DX), DX
12042  	DECL SI
12043  	JZ   match_extend_back_end_encodeSnappyBlockAsm64K
12044  	JMP  match_extend_back_loop_encodeSnappyBlockAsm64K
12045  
12046  match_extend_back_end_encodeSnappyBlockAsm64K:
12047  	MOVL DX, DI
12048  	SUBL 12(SP), DI
12049  	LEAQ 3(CX)(DI*1), DI
12050  	CMPQ DI, (SP)
12051  	JB   match_dst_size_check_encodeSnappyBlockAsm64K
12052  	MOVQ $0x00000000, ret+56(FP)
12053  	RET
12054  
12055  match_dst_size_check_encodeSnappyBlockAsm64K:
12056  	MOVL DX, DI
12057  	MOVL 12(SP), R8
12058  	CMPL R8, DI
12059  	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm64K
12060  	MOVL DI, R9
12061  	MOVL DI, 12(SP)
12062  	LEAQ (BX)(R8*1), DI
12063  	SUBL R8, R9
12064  	LEAL -1(R9), R8
12065  	CMPL R8, $0x3c
12066  	JB   one_byte_match_emit_encodeSnappyBlockAsm64K
12067  	CMPL R8, $0x00000100
12068  	JB   two_bytes_match_emit_encodeSnappyBlockAsm64K
12069  	JB   three_bytes_match_emit_encodeSnappyBlockAsm64K
12070  
12071  three_bytes_match_emit_encodeSnappyBlockAsm64K:
12072  	MOVB $0xf4, (CX)
12073  	MOVW R8, 1(CX)
12074  	ADDQ $0x03, CX
12075  	JMP  memmove_long_match_emit_encodeSnappyBlockAsm64K
12076  
12077  two_bytes_match_emit_encodeSnappyBlockAsm64K:
12078  	MOVB $0xf0, (CX)
12079  	MOVB R8, 1(CX)
12080  	ADDQ $0x02, CX
12081  	CMPL R8, $0x40
12082  	JB   memmove_match_emit_encodeSnappyBlockAsm64K
12083  	JMP  memmove_long_match_emit_encodeSnappyBlockAsm64K
12084  
12085  one_byte_match_emit_encodeSnappyBlockAsm64K:
12086  	SHLB $0x02, R8
12087  	MOVB R8, (CX)
12088  	ADDQ $0x01, CX
12089  
12090  memmove_match_emit_encodeSnappyBlockAsm64K:
12091  	LEAQ (CX)(R9*1), R8
12092  
12093  	// genMemMoveShort
12094  	CMPQ R9, $0x08
12095  	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8
12096  	CMPQ R9, $0x10
12097  	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
12098  	CMPQ R9, $0x20
12099  	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
12100  	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
12101  
12102  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8:
12103  	MOVQ (DI), R10
12104  	MOVQ R10, (CX)
12105  	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
12106  
12107  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
12108  	MOVQ (DI), R10
12109  	MOVQ -8(DI)(R9*1), DI
12110  	MOVQ R10, (CX)
12111  	MOVQ DI, -8(CX)(R9*1)
12112  	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
12113  
12114  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
12115  	MOVOU (DI), X0
12116  	MOVOU -16(DI)(R9*1), X1
12117  	MOVOU X0, (CX)
12118  	MOVOU X1, -16(CX)(R9*1)
12119  	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
12120  
12121  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
12122  	MOVOU (DI), X0
12123  	MOVOU 16(DI), X1
12124  	MOVOU -32(DI)(R9*1), X2
12125  	MOVOU -16(DI)(R9*1), X3
12126  	MOVOU X0, (CX)
12127  	MOVOU X1, 16(CX)
12128  	MOVOU X2, -32(CX)(R9*1)
12129  	MOVOU X3, -16(CX)(R9*1)
12130  
12131  memmove_end_copy_match_emit_encodeSnappyBlockAsm64K:
12132  	MOVQ R8, CX
12133  	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm64K
12134  
12135  memmove_long_match_emit_encodeSnappyBlockAsm64K:
12136  	LEAQ (CX)(R9*1), R8
12137  
12138  	// genMemMoveLong
12139  	MOVOU (DI), X0
12140  	MOVOU 16(DI), X1
12141  	MOVOU -32(DI)(R9*1), X2
12142  	MOVOU -16(DI)(R9*1), X3
12143  	MOVQ  R9, R11
12144  	SHRQ  $0x05, R11
12145  	MOVQ  CX, R10
12146  	ANDL  $0x0000001f, R10
12147  	MOVQ  $0x00000040, R12
12148  	SUBQ  R10, R12
12149  	DECQ  R11
12150  	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
12151  	LEAQ  -32(DI)(R12*1), R10
12152  	LEAQ  -32(CX)(R12*1), R13
12153  
12154  emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
12155  	MOVOU (R10), X4
12156  	MOVOU 16(R10), X5
12157  	MOVOA X4, (R13)
12158  	MOVOA X5, 16(R13)
12159  	ADDQ  $0x20, R13
12160  	ADDQ  $0x20, R10
12161  	ADDQ  $0x20, R12
12162  	DECQ  R11
12163  	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
12164  
12165  emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
12166  	MOVOU -32(DI)(R12*1), X4
12167  	MOVOU -16(DI)(R12*1), X5
12168  	MOVOA X4, -32(CX)(R12*1)
12169  	MOVOA X5, -16(CX)(R12*1)
12170  	ADDQ  $0x20, R12
12171  	CMPQ  R9, R12
12172  	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
12173  	MOVOU X0, (CX)
12174  	MOVOU X1, 16(CX)
12175  	MOVOU X2, -32(CX)(R9*1)
12176  	MOVOU X3, -16(CX)(R9*1)
12177  	MOVQ  R8, CX
12178  
12179  emit_literal_done_match_emit_encodeSnappyBlockAsm64K:
12180  match_nolit_loop_encodeSnappyBlockAsm64K:
12181  	MOVL DX, DI
12182  	SUBL SI, DI
12183  	MOVL DI, 16(SP)
12184  	ADDL $0x04, DX
12185  	ADDL $0x04, SI
12186  	MOVQ src_len+32(FP), DI
12187  	SUBL DX, DI
12188  	LEAQ (BX)(DX*1), R8
12189  	LEAQ (BX)(SI*1), SI
12190  
12191  	// matchLen
12192  	XORL R10, R10
12193  
12194  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K:
12195  	CMPL DI, $0x10
12196  	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm64K
12197  	MOVQ (R8)(R10*1), R9
12198  	MOVQ 8(R8)(R10*1), R11
12199  	XORQ (SI)(R10*1), R9
12200  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K
12201  	XORQ 8(SI)(R10*1), R11
12202  	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K
12203  	LEAL -16(DI), DI
12204  	LEAL 16(R10), R10
12205  	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K
12206  
12207  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K:
12208  #ifdef GOAMD64_v3
12209  	TZCNTQ R11, R11
12210  
12211  #else
12212  	BSFQ R11, R11
12213  
12214  #endif
12215  	SARQ $0x03, R11
12216  	LEAL 8(R10)(R11*1), R10
12217  	JMP  match_nolit_end_encodeSnappyBlockAsm64K
12218  
12219  matchlen_match8_match_nolit_encodeSnappyBlockAsm64K:
12220  	CMPL DI, $0x08
12221  	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
12222  	MOVQ (R8)(R10*1), R9
12223  	XORQ (SI)(R10*1), R9
12224  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K
12225  	LEAL -8(DI), DI
12226  	LEAL 8(R10), R10
12227  	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
12228  
12229  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K:
12230  #ifdef GOAMD64_v3
12231  	TZCNTQ R9, R9
12232  
12233  #else
12234  	BSFQ R9, R9
12235  
12236  #endif
12237  	SARQ $0x03, R9
12238  	LEAL (R10)(R9*1), R10
12239  	JMP  match_nolit_end_encodeSnappyBlockAsm64K
12240  
12241  matchlen_match4_match_nolit_encodeSnappyBlockAsm64K:
12242  	CMPL DI, $0x04
12243  	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
12244  	MOVL (R8)(R10*1), R9
12245  	CMPL (SI)(R10*1), R9
12246  	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
12247  	LEAL -4(DI), DI
12248  	LEAL 4(R10), R10
12249  
12250  matchlen_match2_match_nolit_encodeSnappyBlockAsm64K:
12251  	CMPL DI, $0x01
12252  	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
12253  	JB   match_nolit_end_encodeSnappyBlockAsm64K
12254  	MOVW (R8)(R10*1), R9
12255  	CMPW (SI)(R10*1), R9
12256  	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
12257  	LEAL 2(R10), R10
12258  	SUBL $0x02, DI
12259  	JZ   match_nolit_end_encodeSnappyBlockAsm64K
12260  
12261  matchlen_match1_match_nolit_encodeSnappyBlockAsm64K:
12262  	MOVB (R8)(R10*1), R9
12263  	CMPB (SI)(R10*1), R9
12264  	JNE  match_nolit_end_encodeSnappyBlockAsm64K
12265  	LEAL 1(R10), R10
12266  
12267  match_nolit_end_encodeSnappyBlockAsm64K:
12268  	ADDL R10, DX
12269  	MOVL 16(SP), SI
12270  	ADDL $0x04, R10
12271  	MOVL DX, 12(SP)
12272  
12273  	// emitCopy
12274  two_byte_offset_match_nolit_encodeSnappyBlockAsm64K:
12275  	CMPL R10, $0x40
12276  	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K
12277  	MOVB $0xee, (CX)
12278  	MOVW SI, 1(CX)
12279  	LEAL -60(R10), R10
12280  	ADDQ $0x03, CX
12281  	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm64K
12282  
12283  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K:
12284  	MOVL R10, DI
12285  	SHLL $0x02, DI
12286  	CMPL R10, $0x0c
12287  	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
12288  	CMPL SI, $0x00000800
12289  	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
12290  	LEAL -15(DI), DI
12291  	MOVB SI, 1(CX)
12292  	SHRL $0x08, SI
12293  	SHLL $0x05, SI
12294  	ORL  SI, DI
12295  	MOVB DI, (CX)
12296  	ADDQ $0x02, CX
12297  	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm64K
12298  
12299  emit_copy_three_match_nolit_encodeSnappyBlockAsm64K:
12300  	LEAL -2(DI), DI
12301  	MOVB DI, (CX)
12302  	MOVW SI, 1(CX)
12303  	ADDQ $0x03, CX
12304  
12305  match_nolit_emitcopy_end_encodeSnappyBlockAsm64K:
12306  	CMPL DX, 8(SP)
12307  	JAE  emit_remainder_encodeSnappyBlockAsm64K
12308  	MOVQ -2(BX)(DX*1), DI
12309  	CMPQ CX, (SP)
12310  	JB   match_nolit_dst_ok_encodeSnappyBlockAsm64K
12311  	MOVQ $0x00000000, ret+56(FP)
12312  	RET
12313  
12314  match_nolit_dst_ok_encodeSnappyBlockAsm64K:
12315  	MOVQ  $0x0000cf1bbcdcbf9b, R9
12316  	MOVQ  DI, R8
12317  	SHRQ  $0x10, DI
12318  	MOVQ  DI, SI
12319  	SHLQ  $0x10, R8
12320  	IMULQ R9, R8
12321  	SHRQ  $0x32, R8
12322  	SHLQ  $0x10, SI
12323  	IMULQ R9, SI
12324  	SHRQ  $0x32, SI
12325  	LEAL  -2(DX), R9
12326  	LEAQ  (AX)(SI*4), R10
12327  	MOVL  (R10), SI
12328  	MOVL  R9, (AX)(R8*4)
12329  	MOVL  DX, (R10)
12330  	CMPL  (BX)(SI*1), DI
12331  	JEQ   match_nolit_loop_encodeSnappyBlockAsm64K
12332  	INCL  DX
12333  	JMP   search_loop_encodeSnappyBlockAsm64K
12334  
12335  emit_remainder_encodeSnappyBlockAsm64K:
12336  	MOVQ src_len+32(FP), AX
12337  	SUBL 12(SP), AX
12338  	LEAQ 3(CX)(AX*1), AX
12339  	CMPQ AX, (SP)
12340  	JB   emit_remainder_ok_encodeSnappyBlockAsm64K
12341  	MOVQ $0x00000000, ret+56(FP)
12342  	RET
12343  
12344  emit_remainder_ok_encodeSnappyBlockAsm64K:
12345  	MOVQ src_len+32(FP), AX
12346  	MOVL 12(SP), DX
12347  	CMPL DX, AX
12348  	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
12349  	MOVL AX, SI
12350  	MOVL AX, 12(SP)
12351  	LEAQ (BX)(DX*1), AX
12352  	SUBL DX, SI
12353  	LEAL -1(SI), DX
12354  	CMPL DX, $0x3c
12355  	JB   one_byte_emit_remainder_encodeSnappyBlockAsm64K
12356  	CMPL DX, $0x00000100
12357  	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm64K
12358  	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm64K
12359  
12360  three_bytes_emit_remainder_encodeSnappyBlockAsm64K:
12361  	MOVB $0xf4, (CX)
12362  	MOVW DX, 1(CX)
12363  	ADDQ $0x03, CX
12364  	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm64K
12365  
12366  two_bytes_emit_remainder_encodeSnappyBlockAsm64K:
12367  	MOVB $0xf0, (CX)
12368  	MOVB DL, 1(CX)
12369  	ADDQ $0x02, CX
12370  	CMPL DX, $0x40
12371  	JB   memmove_emit_remainder_encodeSnappyBlockAsm64K
12372  	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm64K
12373  
12374  one_byte_emit_remainder_encodeSnappyBlockAsm64K:
12375  	SHLB $0x02, DL
12376  	MOVB DL, (CX)
12377  	ADDQ $0x01, CX
12378  
12379  memmove_emit_remainder_encodeSnappyBlockAsm64K:
12380  	LEAQ (CX)(SI*1), DX
12381  	MOVL SI, BX
12382  
12383  	// genMemMoveShort
12384  	CMPQ BX, $0x03
12385  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2
12386  	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3
12387  	CMPQ BX, $0x08
12388  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7
12389  	CMPQ BX, $0x10
12390  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16
12391  	CMPQ BX, $0x20
12392  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32
12393  	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64
12394  
12395  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2:
12396  	MOVB (AX), SI
12397  	MOVB -1(AX)(BX*1), AL
12398  	MOVB SI, (CX)
12399  	MOVB AL, -1(CX)(BX*1)
12400  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
12401  
12402  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3:
12403  	MOVW (AX), SI
12404  	MOVB 2(AX), AL
12405  	MOVW SI, (CX)
12406  	MOVB AL, 2(CX)
12407  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
12408  
12409  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7:
12410  	MOVL (AX), SI
12411  	MOVL -4(AX)(BX*1), AX
12412  	MOVL SI, (CX)
12413  	MOVL AX, -4(CX)(BX*1)
12414  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
12415  
12416  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16:
12417  	MOVQ (AX), SI
12418  	MOVQ -8(AX)(BX*1), AX
12419  	MOVQ SI, (CX)
12420  	MOVQ AX, -8(CX)(BX*1)
12421  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
12422  
12423  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32:
12424  	MOVOU (AX), X0
12425  	MOVOU -16(AX)(BX*1), X1
12426  	MOVOU X0, (CX)
12427  	MOVOU X1, -16(CX)(BX*1)
12428  	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
12429  
12430  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64:
12431  	MOVOU (AX), X0
12432  	MOVOU 16(AX), X1
12433  	MOVOU -32(AX)(BX*1), X2
12434  	MOVOU -16(AX)(BX*1), X3
12435  	MOVOU X0, (CX)
12436  	MOVOU X1, 16(CX)
12437  	MOVOU X2, -32(CX)(BX*1)
12438  	MOVOU X3, -16(CX)(BX*1)
12439  
12440  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K:
12441  	MOVQ DX, CX
12442  	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
12443  
12444  memmove_long_emit_remainder_encodeSnappyBlockAsm64K:
12445  	LEAQ (CX)(SI*1), DX
12446  	MOVL SI, BX
12447  
12448  	// genMemMoveLong
12449  	MOVOU (AX), X0
12450  	MOVOU 16(AX), X1
12451  	MOVOU -32(AX)(BX*1), X2
12452  	MOVOU -16(AX)(BX*1), X3
12453  	MOVQ  BX, DI
12454  	SHRQ  $0x05, DI
12455  	MOVQ  CX, SI
12456  	ANDL  $0x0000001f, SI
12457  	MOVQ  $0x00000040, R8
12458  	SUBQ  SI, R8
12459  	DECQ  DI
12460  	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
12461  	LEAQ  -32(AX)(R8*1), SI
12462  	LEAQ  -32(CX)(R8*1), R9
12463  
12464  emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back:
12465  	MOVOU (SI), X4
12466  	MOVOU 16(SI), X5
12467  	MOVOA X4, (R9)
12468  	MOVOA X5, 16(R9)
12469  	ADDQ  $0x20, R9
12470  	ADDQ  $0x20, SI
12471  	ADDQ  $0x20, R8
12472  	DECQ  DI
12473  	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back
12474  
12475  emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
12476  	MOVOU -32(AX)(R8*1), X4
12477  	MOVOU -16(AX)(R8*1), X5
12478  	MOVOA X4, -32(CX)(R8*1)
12479  	MOVOA X5, -16(CX)(R8*1)
12480  	ADDQ  $0x20, R8
12481  	CMPQ  BX, R8
12482  	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
12483  	MOVOU X0, (CX)
12484  	MOVOU X1, 16(CX)
12485  	MOVOU X2, -32(CX)(BX*1)
12486  	MOVOU X3, -16(CX)(BX*1)
12487  	MOVQ  DX, CX
12488  
12489  emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K:
12490  	MOVQ dst_base+0(FP), AX
12491  	SUBQ AX, CX
12492  	MOVQ CX, ret+56(FP)
12493  	RET
12494  
12495  // func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int
12496  // Requires: BMI, SSE2
12497  TEXT ·encodeSnappyBlockAsm12B(SB), $24-64
12498  	MOVQ tmp+48(FP), AX
12499  	MOVQ dst_base+0(FP), CX
12500  	MOVQ $0x00000080, DX
12501  	MOVQ AX, BX
12502  	PXOR X0, X0
12503  
12504  zero_loop_encodeSnappyBlockAsm12B:
12505  	MOVOU X0, (BX)
12506  	MOVOU X0, 16(BX)
12507  	MOVOU X0, 32(BX)
12508  	MOVOU X0, 48(BX)
12509  	MOVOU X0, 64(BX)
12510  	MOVOU X0, 80(BX)
12511  	MOVOU X0, 96(BX)
12512  	MOVOU X0, 112(BX)
12513  	ADDQ  $0x80, BX
12514  	DECQ  DX
12515  	JNZ   zero_loop_encodeSnappyBlockAsm12B
12516  	MOVL  $0x00000000, 12(SP)
12517  	MOVQ  src_len+32(FP), DX
12518  	LEAQ  -9(DX), BX
12519  	LEAQ  -8(DX), SI
12520  	MOVL  SI, 8(SP)
12521  	SHRQ  $0x05, DX
12522  	SUBL  DX, BX
12523  	LEAQ  (CX)(BX*1), BX
12524  	MOVQ  BX, (SP)
12525  	MOVL  $0x00000001, DX
12526  	MOVL  DX, 16(SP)
12527  	MOVQ  src_base+24(FP), BX
12528  
12529  search_loop_encodeSnappyBlockAsm12B:
12530  	MOVL  DX, SI
12531  	SUBL  12(SP), SI
12532  	SHRL  $0x05, SI
12533  	LEAL  4(DX)(SI*1), SI
12534  	CMPL  SI, 8(SP)
12535  	JAE   emit_remainder_encodeSnappyBlockAsm12B
12536  	MOVQ  (BX)(DX*1), DI
12537  	MOVL  SI, 20(SP)
12538  	MOVQ  $0x000000cf1bbcdcbb, R9
12539  	MOVQ  DI, R10
12540  	MOVQ  DI, R11
12541  	SHRQ  $0x08, R11
12542  	SHLQ  $0x18, R10
12543  	IMULQ R9, R10
12544  	SHRQ  $0x34, R10
12545  	SHLQ  $0x18, R11
12546  	IMULQ R9, R11
12547  	SHRQ  $0x34, R11
12548  	MOVL  (AX)(R10*4), SI
12549  	MOVL  (AX)(R11*4), R8
12550  	MOVL  DX, (AX)(R10*4)
12551  	LEAL  1(DX), R10
12552  	MOVL  R10, (AX)(R11*4)
12553  	MOVQ  DI, R10
12554  	SHRQ  $0x10, R10
12555  	SHLQ  $0x18, R10
12556  	IMULQ R9, R10
12557  	SHRQ  $0x34, R10
12558  	MOVL  DX, R9
12559  	SUBL  16(SP), R9
12560  	MOVL  1(BX)(R9*1), R11
12561  	MOVQ  DI, R9
12562  	SHRQ  $0x08, R9
12563  	CMPL  R9, R11
12564  	JNE   no_repeat_found_encodeSnappyBlockAsm12B
12565  	LEAL  1(DX), DI
12566  	MOVL  12(SP), SI
12567  	MOVL  DI, R8
12568  	SUBL  16(SP), R8
12569  	JZ    repeat_extend_back_end_encodeSnappyBlockAsm12B
12570  
12571  repeat_extend_back_loop_encodeSnappyBlockAsm12B:
12572  	CMPL DI, SI
12573  	JBE  repeat_extend_back_end_encodeSnappyBlockAsm12B
12574  	MOVB -1(BX)(R8*1), R9
12575  	MOVB -1(BX)(DI*1), R10
12576  	CMPB R9, R10
12577  	JNE  repeat_extend_back_end_encodeSnappyBlockAsm12B
12578  	LEAL -1(DI), DI
12579  	DECL R8
12580  	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm12B
12581  
12582  repeat_extend_back_end_encodeSnappyBlockAsm12B:
12583  	MOVL DI, SI
12584  	SUBL 12(SP), SI
12585  	LEAQ 3(CX)(SI*1), SI
12586  	CMPQ SI, (SP)
12587  	JB   repeat_dst_size_check_encodeSnappyBlockAsm12B
12588  	MOVQ $0x00000000, ret+56(FP)
12589  	RET
12590  
12591  repeat_dst_size_check_encodeSnappyBlockAsm12B:
12592  	MOVL 12(SP), SI
12593  	CMPL SI, DI
12594  	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
12595  	MOVL DI, R8
12596  	MOVL DI, 12(SP)
12597  	LEAQ (BX)(SI*1), R9
12598  	SUBL SI, R8
12599  	LEAL -1(R8), SI
12600  	CMPL SI, $0x3c
12601  	JB   one_byte_repeat_emit_encodeSnappyBlockAsm12B
12602  	CMPL SI, $0x00000100
12603  	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm12B
12604  	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm12B
12605  
12606  three_bytes_repeat_emit_encodeSnappyBlockAsm12B:
12607  	MOVB $0xf4, (CX)
12608  	MOVW SI, 1(CX)
12609  	ADDQ $0x03, CX
12610  	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm12B
12611  
12612  two_bytes_repeat_emit_encodeSnappyBlockAsm12B:
12613  	MOVB $0xf0, (CX)
12614  	MOVB SI, 1(CX)
12615  	ADDQ $0x02, CX
12616  	CMPL SI, $0x40
12617  	JB   memmove_repeat_emit_encodeSnappyBlockAsm12B
12618  	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm12B
12619  
12620  one_byte_repeat_emit_encodeSnappyBlockAsm12B:
12621  	SHLB $0x02, SI
12622  	MOVB SI, (CX)
12623  	ADDQ $0x01, CX
12624  
12625  memmove_repeat_emit_encodeSnappyBlockAsm12B:
12626  	LEAQ (CX)(R8*1), SI
12627  
12628  	// genMemMoveShort
12629  	CMPQ R8, $0x08
12630  	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8
12631  	CMPQ R8, $0x10
12632  	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
12633  	CMPQ R8, $0x20
12634  	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
12635  	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
12636  
12637  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8:
12638  	MOVQ (R9), R10
12639  	MOVQ R10, (CX)
12640  	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
12641  
12642  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
12643  	MOVQ (R9), R10
12644  	MOVQ -8(R9)(R8*1), R9
12645  	MOVQ R10, (CX)
12646  	MOVQ R9, -8(CX)(R8*1)
12647  	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
12648  
12649  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
12650  	MOVOU (R9), X0
12651  	MOVOU -16(R9)(R8*1), X1
12652  	MOVOU X0, (CX)
12653  	MOVOU X1, -16(CX)(R8*1)
12654  	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
12655  
12656  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
12657  	MOVOU (R9), X0
12658  	MOVOU 16(R9), X1
12659  	MOVOU -32(R9)(R8*1), X2
12660  	MOVOU -16(R9)(R8*1), X3
12661  	MOVOU X0, (CX)
12662  	MOVOU X1, 16(CX)
12663  	MOVOU X2, -32(CX)(R8*1)
12664  	MOVOU X3, -16(CX)(R8*1)
12665  
12666  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B:
12667  	MOVQ SI, CX
12668  	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
12669  
12670  memmove_long_repeat_emit_encodeSnappyBlockAsm12B:
12671  	LEAQ (CX)(R8*1), SI
12672  
12673  	// genMemMoveLong
12674  	MOVOU (R9), X0
12675  	MOVOU 16(R9), X1
12676  	MOVOU -32(R9)(R8*1), X2
12677  	MOVOU -16(R9)(R8*1), X3
12678  	MOVQ  R8, R11
12679  	SHRQ  $0x05, R11
12680  	MOVQ  CX, R10
12681  	ANDL  $0x0000001f, R10
12682  	MOVQ  $0x00000040, R12
12683  	SUBQ  R10, R12
12684  	DECQ  R11
12685  	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
12686  	LEAQ  -32(R9)(R12*1), R10
12687  	LEAQ  -32(CX)(R12*1), R13
12688  
12689  emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
12690  	MOVOU (R10), X4
12691  	MOVOU 16(R10), X5
12692  	MOVOA X4, (R13)
12693  	MOVOA X5, 16(R13)
12694  	ADDQ  $0x20, R13
12695  	ADDQ  $0x20, R10
12696  	ADDQ  $0x20, R12
12697  	DECQ  R11
12698  	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
12699  
12700  emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
12701  	MOVOU -32(R9)(R12*1), X4
12702  	MOVOU -16(R9)(R12*1), X5
12703  	MOVOA X4, -32(CX)(R12*1)
12704  	MOVOA X5, -16(CX)(R12*1)
12705  	ADDQ  $0x20, R12
12706  	CMPQ  R8, R12
12707  	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
12708  	MOVOU X0, (CX)
12709  	MOVOU X1, 16(CX)
12710  	MOVOU X2, -32(CX)(R8*1)
12711  	MOVOU X3, -16(CX)(R8*1)
12712  	MOVQ  SI, CX
12713  
12714  emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B:
12715  	ADDL $0x05, DX
12716  	MOVL DX, SI
12717  	SUBL 16(SP), SI
12718  	MOVQ src_len+32(FP), R8
12719  	SUBL DX, R8
12720  	LEAQ (BX)(DX*1), R9
12721  	LEAQ (BX)(SI*1), SI
12722  
12723  	// matchLen
12724  	XORL R11, R11
12725  
12726  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B:
12727  	CMPL R8, $0x10
12728  	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B
12729  	MOVQ (R9)(R11*1), R10
12730  	MOVQ 8(R9)(R11*1), R12
12731  	XORQ (SI)(R11*1), R10
12732  	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B
12733  	XORQ 8(SI)(R11*1), R12
12734  	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B
12735  	LEAL -16(R8), R8
12736  	LEAL 16(R11), R11
12737  	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B
12738  
12739  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B:
12740  #ifdef GOAMD64_v3
12741  	TZCNTQ R12, R12
12742  
12743  #else
12744  	BSFQ R12, R12
12745  
12746  #endif
12747  	SARQ $0x03, R12
12748  	LEAL 8(R11)(R12*1), R11
12749  	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm12B
12750  
12751  matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B:
12752  	CMPL R8, $0x08
12753  	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
12754  	MOVQ (R9)(R11*1), R10
12755  	XORQ (SI)(R11*1), R10
12756  	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B
12757  	LEAL -8(R8), R8
12758  	LEAL 8(R11), R11
12759  	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
12760  
12761  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B:
12762  #ifdef GOAMD64_v3
12763  	TZCNTQ R10, R10
12764  
12765  #else
12766  	BSFQ R10, R10
12767  
12768  #endif
12769  	SARQ $0x03, R10
12770  	LEAL (R11)(R10*1), R11
12771  	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm12B
12772  
12773  matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B:
12774  	CMPL R8, $0x04
12775  	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
12776  	MOVL (R9)(R11*1), R10
12777  	CMPL (SI)(R11*1), R10
12778  	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
12779  	LEAL -4(R8), R8
12780  	LEAL 4(R11), R11
12781  
12782  matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B:
12783  	CMPL R8, $0x01
12784  	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
12785  	JB   repeat_extend_forward_end_encodeSnappyBlockAsm12B
12786  	MOVW (R9)(R11*1), R10
12787  	CMPW (SI)(R11*1), R10
12788  	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
12789  	LEAL 2(R11), R11
12790  	SUBL $0x02, R8
12791  	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm12B
12792  
12793  matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B:
12794  	MOVB (R9)(R11*1), R10
12795  	CMPB (SI)(R11*1), R10
12796  	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm12B
12797  	LEAL 1(R11), R11
12798  
12799  repeat_extend_forward_end_encodeSnappyBlockAsm12B:
12800  	ADDL R11, DX
12801  	MOVL DX, SI
12802  	SUBL DI, SI
12803  	MOVL 16(SP), DI
12804  
12805  	// emitCopy
12806  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B:
12807  	CMPL SI, $0x40
12808  	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B
12809  	MOVB $0xee, (CX)
12810  	MOVW DI, 1(CX)
12811  	LEAL -60(SI), SI
12812  	ADDQ $0x03, CX
12813  	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B
12814  
12815  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B:
12816  	MOVL SI, R8
12817  	SHLL $0x02, R8
12818  	CMPL SI, $0x0c
12819  	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
12820  	CMPL DI, $0x00000800
12821  	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
12822  	LEAL -15(R8), R8
12823  	MOVB DI, 1(CX)
12824  	SHRL $0x08, DI
12825  	SHLL $0x05, DI
12826  	ORL  DI, R8
12827  	MOVB R8, (CX)
12828  	ADDQ $0x02, CX
12829  	JMP  repeat_end_emit_encodeSnappyBlockAsm12B
12830  
12831  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B:
12832  	LEAL -2(R8), R8
12833  	MOVB R8, (CX)
12834  	MOVW DI, 1(CX)
12835  	ADDQ $0x03, CX
12836  
12837  repeat_end_emit_encodeSnappyBlockAsm12B:
12838  	MOVL DX, 12(SP)
12839  	JMP  search_loop_encodeSnappyBlockAsm12B
12840  
12841  no_repeat_found_encodeSnappyBlockAsm12B:
12842  	CMPL (BX)(SI*1), DI
12843  	JEQ  candidate_match_encodeSnappyBlockAsm12B
12844  	SHRQ $0x08, DI
12845  	MOVL (AX)(R10*4), SI
12846  	LEAL 2(DX), R9
12847  	CMPL (BX)(R8*1), DI
12848  	JEQ  candidate2_match_encodeSnappyBlockAsm12B
12849  	MOVL R9, (AX)(R10*4)
12850  	SHRQ $0x08, DI
12851  	CMPL (BX)(SI*1), DI
12852  	JEQ  candidate3_match_encodeSnappyBlockAsm12B
12853  	MOVL 20(SP), DX
12854  	JMP  search_loop_encodeSnappyBlockAsm12B
12855  
12856  candidate3_match_encodeSnappyBlockAsm12B:
12857  	ADDL $0x02, DX
12858  	JMP  candidate_match_encodeSnappyBlockAsm12B
12859  
12860  candidate2_match_encodeSnappyBlockAsm12B:
12861  	MOVL R9, (AX)(R10*4)
12862  	INCL DX
12863  	MOVL R8, SI
12864  
12865  candidate_match_encodeSnappyBlockAsm12B:
12866  	MOVL  12(SP), DI
12867  	TESTL SI, SI
12868  	JZ    match_extend_back_end_encodeSnappyBlockAsm12B
12869  
12870  match_extend_back_loop_encodeSnappyBlockAsm12B:
12871  	CMPL DX, DI
12872  	JBE  match_extend_back_end_encodeSnappyBlockAsm12B
12873  	MOVB -1(BX)(SI*1), R8
12874  	MOVB -1(BX)(DX*1), R9
12875  	CMPB R8, R9
12876  	JNE  match_extend_back_end_encodeSnappyBlockAsm12B
12877  	LEAL -1(DX), DX
12878  	DECL SI
12879  	JZ   match_extend_back_end_encodeSnappyBlockAsm12B
12880  	JMP  match_extend_back_loop_encodeSnappyBlockAsm12B
12881  
12882  match_extend_back_end_encodeSnappyBlockAsm12B:
12883  	MOVL DX, DI
12884  	SUBL 12(SP), DI
12885  	LEAQ 3(CX)(DI*1), DI
12886  	CMPQ DI, (SP)
12887  	JB   match_dst_size_check_encodeSnappyBlockAsm12B
12888  	MOVQ $0x00000000, ret+56(FP)
12889  	RET
12890  
12891  match_dst_size_check_encodeSnappyBlockAsm12B:
12892  	MOVL DX, DI
12893  	MOVL 12(SP), R8
12894  	CMPL R8, DI
12895  	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm12B
12896  	MOVL DI, R9
12897  	MOVL DI, 12(SP)
12898  	LEAQ (BX)(R8*1), DI
12899  	SUBL R8, R9
12900  	LEAL -1(R9), R8
12901  	CMPL R8, $0x3c
12902  	JB   one_byte_match_emit_encodeSnappyBlockAsm12B
12903  	CMPL R8, $0x00000100
12904  	JB   two_bytes_match_emit_encodeSnappyBlockAsm12B
12905  	JB   three_bytes_match_emit_encodeSnappyBlockAsm12B
12906  
12907  three_bytes_match_emit_encodeSnappyBlockAsm12B:
12908  	MOVB $0xf4, (CX)
12909  	MOVW R8, 1(CX)
12910  	ADDQ $0x03, CX
12911  	JMP  memmove_long_match_emit_encodeSnappyBlockAsm12B
12912  
12913  two_bytes_match_emit_encodeSnappyBlockAsm12B:
12914  	MOVB $0xf0, (CX)
12915  	MOVB R8, 1(CX)
12916  	ADDQ $0x02, CX
12917  	CMPL R8, $0x40
12918  	JB   memmove_match_emit_encodeSnappyBlockAsm12B
12919  	JMP  memmove_long_match_emit_encodeSnappyBlockAsm12B
12920  
12921  one_byte_match_emit_encodeSnappyBlockAsm12B:
12922  	SHLB $0x02, R8
12923  	MOVB R8, (CX)
12924  	ADDQ $0x01, CX
12925  
12926  memmove_match_emit_encodeSnappyBlockAsm12B:
12927  	LEAQ (CX)(R9*1), R8
12928  
12929  	// genMemMoveShort
12930  	CMPQ R9, $0x08
12931  	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8
12932  	CMPQ R9, $0x10
12933  	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
12934  	CMPQ R9, $0x20
12935  	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
12936  	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
12937  
12938  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8:
12939  	MOVQ (DI), R10
12940  	MOVQ R10, (CX)
12941  	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
12942  
12943  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
12944  	MOVQ (DI), R10
12945  	MOVQ -8(DI)(R9*1), DI
12946  	MOVQ R10, (CX)
12947  	MOVQ DI, -8(CX)(R9*1)
12948  	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
12949  
12950  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
12951  	MOVOU (DI), X0
12952  	MOVOU -16(DI)(R9*1), X1
12953  	MOVOU X0, (CX)
12954  	MOVOU X1, -16(CX)(R9*1)
12955  	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
12956  
12957  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
12958  	MOVOU (DI), X0
12959  	MOVOU 16(DI), X1
12960  	MOVOU -32(DI)(R9*1), X2
12961  	MOVOU -16(DI)(R9*1), X3
12962  	MOVOU X0, (CX)
12963  	MOVOU X1, 16(CX)
12964  	MOVOU X2, -32(CX)(R9*1)
12965  	MOVOU X3, -16(CX)(R9*1)
12966  
12967  memmove_end_copy_match_emit_encodeSnappyBlockAsm12B:
12968  	MOVQ R8, CX
12969  	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm12B
12970  
12971  memmove_long_match_emit_encodeSnappyBlockAsm12B:
12972  	LEAQ (CX)(R9*1), R8
12973  
12974  	// genMemMoveLong
12975  	MOVOU (DI), X0
12976  	MOVOU 16(DI), X1
12977  	MOVOU -32(DI)(R9*1), X2
12978  	MOVOU -16(DI)(R9*1), X3
12979  	MOVQ  R9, R11
12980  	SHRQ  $0x05, R11
12981  	MOVQ  CX, R10
12982  	ANDL  $0x0000001f, R10
12983  	MOVQ  $0x00000040, R12
12984  	SUBQ  R10, R12
12985  	DECQ  R11
12986  	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
12987  	LEAQ  -32(DI)(R12*1), R10
12988  	LEAQ  -32(CX)(R12*1), R13
12989  
12990  emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
12991  	MOVOU (R10), X4
12992  	MOVOU 16(R10), X5
12993  	MOVOA X4, (R13)
12994  	MOVOA X5, 16(R13)
12995  	ADDQ  $0x20, R13
12996  	ADDQ  $0x20, R10
12997  	ADDQ  $0x20, R12
12998  	DECQ  R11
12999  	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
13000  
13001  emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
13002  	MOVOU -32(DI)(R12*1), X4
13003  	MOVOU -16(DI)(R12*1), X5
13004  	MOVOA X4, -32(CX)(R12*1)
13005  	MOVOA X5, -16(CX)(R12*1)
13006  	ADDQ  $0x20, R12
13007  	CMPQ  R9, R12
13008  	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
13009  	MOVOU X0, (CX)
13010  	MOVOU X1, 16(CX)
13011  	MOVOU X2, -32(CX)(R9*1)
13012  	MOVOU X3, -16(CX)(R9*1)
13013  	MOVQ  R8, CX
13014  
13015  emit_literal_done_match_emit_encodeSnappyBlockAsm12B:
13016  match_nolit_loop_encodeSnappyBlockAsm12B:
13017  	MOVL DX, DI
13018  	SUBL SI, DI
13019  	MOVL DI, 16(SP)
13020  	ADDL $0x04, DX
13021  	ADDL $0x04, SI
13022  	MOVQ src_len+32(FP), DI
13023  	SUBL DX, DI
13024  	LEAQ (BX)(DX*1), R8
13025  	LEAQ (BX)(SI*1), SI
13026  
13027  	// matchLen
13028  	XORL R10, R10
13029  
13030  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B:
13031  	CMPL DI, $0x10
13032  	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm12B
13033  	MOVQ (R8)(R10*1), R9
13034  	MOVQ 8(R8)(R10*1), R11
13035  	XORQ (SI)(R10*1), R9
13036  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B
13037  	XORQ 8(SI)(R10*1), R11
13038  	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B
13039  	LEAL -16(DI), DI
13040  	LEAL 16(R10), R10
13041  	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B
13042  
13043  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B:
13044  #ifdef GOAMD64_v3
13045  	TZCNTQ R11, R11
13046  
13047  #else
13048  	BSFQ R11, R11
13049  
13050  #endif
13051  	SARQ $0x03, R11
13052  	LEAL 8(R10)(R11*1), R10
13053  	JMP  match_nolit_end_encodeSnappyBlockAsm12B
13054  
13055  matchlen_match8_match_nolit_encodeSnappyBlockAsm12B:
13056  	CMPL DI, $0x08
13057  	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
13058  	MOVQ (R8)(R10*1), R9
13059  	XORQ (SI)(R10*1), R9
13060  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B
13061  	LEAL -8(DI), DI
13062  	LEAL 8(R10), R10
13063  	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
13064  
13065  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B:
13066  #ifdef GOAMD64_v3
13067  	TZCNTQ R9, R9
13068  
13069  #else
13070  	BSFQ R9, R9
13071  
13072  #endif
13073  	SARQ $0x03, R9
13074  	LEAL (R10)(R9*1), R10
13075  	JMP  match_nolit_end_encodeSnappyBlockAsm12B
13076  
13077  matchlen_match4_match_nolit_encodeSnappyBlockAsm12B:
13078  	CMPL DI, $0x04
13079  	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
13080  	MOVL (R8)(R10*1), R9
13081  	CMPL (SI)(R10*1), R9
13082  	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
13083  	LEAL -4(DI), DI
13084  	LEAL 4(R10), R10
13085  
13086  matchlen_match2_match_nolit_encodeSnappyBlockAsm12B:
13087  	CMPL DI, $0x01
13088  	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
13089  	JB   match_nolit_end_encodeSnappyBlockAsm12B
13090  	MOVW (R8)(R10*1), R9
13091  	CMPW (SI)(R10*1), R9
13092  	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
13093  	LEAL 2(R10), R10
13094  	SUBL $0x02, DI
13095  	JZ   match_nolit_end_encodeSnappyBlockAsm12B
13096  
13097  matchlen_match1_match_nolit_encodeSnappyBlockAsm12B:
13098  	MOVB (R8)(R10*1), R9
13099  	CMPB (SI)(R10*1), R9
13100  	JNE  match_nolit_end_encodeSnappyBlockAsm12B
13101  	LEAL 1(R10), R10
13102  
13103  match_nolit_end_encodeSnappyBlockAsm12B:
13104  	ADDL R10, DX
13105  	MOVL 16(SP), SI
13106  	ADDL $0x04, R10
13107  	MOVL DX, 12(SP)
13108  
13109  	// emitCopy
13110  two_byte_offset_match_nolit_encodeSnappyBlockAsm12B:
13111  	CMPL R10, $0x40
13112  	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B
13113  	MOVB $0xee, (CX)
13114  	MOVW SI, 1(CX)
13115  	LEAL -60(R10), R10
13116  	ADDQ $0x03, CX
13117  	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm12B
13118  
13119  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B:
13120  	MOVL R10, DI
13121  	SHLL $0x02, DI
13122  	CMPL R10, $0x0c
13123  	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
13124  	CMPL SI, $0x00000800
13125  	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
13126  	LEAL -15(DI), DI
13127  	MOVB SI, 1(CX)
13128  	SHRL $0x08, SI
13129  	SHLL $0x05, SI
13130  	ORL  SI, DI
13131  	MOVB DI, (CX)
13132  	ADDQ $0x02, CX
13133  	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm12B
13134  
13135  emit_copy_three_match_nolit_encodeSnappyBlockAsm12B:
13136  	LEAL -2(DI), DI
13137  	MOVB DI, (CX)
13138  	MOVW SI, 1(CX)
13139  	ADDQ $0x03, CX
13140  
13141  match_nolit_emitcopy_end_encodeSnappyBlockAsm12B:
13142  	CMPL DX, 8(SP)
13143  	JAE  emit_remainder_encodeSnappyBlockAsm12B
13144  	MOVQ -2(BX)(DX*1), DI
13145  	CMPQ CX, (SP)
13146  	JB   match_nolit_dst_ok_encodeSnappyBlockAsm12B
13147  	MOVQ $0x00000000, ret+56(FP)
13148  	RET
13149  
13150  match_nolit_dst_ok_encodeSnappyBlockAsm12B:
13151  	MOVQ  $0x000000cf1bbcdcbb, R9
13152  	MOVQ  DI, R8
13153  	SHRQ  $0x10, DI
13154  	MOVQ  DI, SI
13155  	SHLQ  $0x18, R8
13156  	IMULQ R9, R8
13157  	SHRQ  $0x34, R8
13158  	SHLQ  $0x18, SI
13159  	IMULQ R9, SI
13160  	SHRQ  $0x34, SI
13161  	LEAL  -2(DX), R9
13162  	LEAQ  (AX)(SI*4), R10
13163  	MOVL  (R10), SI
13164  	MOVL  R9, (AX)(R8*4)
13165  	MOVL  DX, (R10)
13166  	CMPL  (BX)(SI*1), DI
13167  	JEQ   match_nolit_loop_encodeSnappyBlockAsm12B
13168  	INCL  DX
13169  	JMP   search_loop_encodeSnappyBlockAsm12B
13170  
13171  emit_remainder_encodeSnappyBlockAsm12B:
13172  	MOVQ src_len+32(FP), AX
13173  	SUBL 12(SP), AX
13174  	LEAQ 3(CX)(AX*1), AX
13175  	CMPQ AX, (SP)
13176  	JB   emit_remainder_ok_encodeSnappyBlockAsm12B
13177  	MOVQ $0x00000000, ret+56(FP)
13178  	RET
13179  
13180  emit_remainder_ok_encodeSnappyBlockAsm12B:
13181  	MOVQ src_len+32(FP), AX
13182  	MOVL 12(SP), DX
13183  	CMPL DX, AX
13184  	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
13185  	MOVL AX, SI
13186  	MOVL AX, 12(SP)
13187  	LEAQ (BX)(DX*1), AX
13188  	SUBL DX, SI
13189  	LEAL -1(SI), DX
13190  	CMPL DX, $0x3c
13191  	JB   one_byte_emit_remainder_encodeSnappyBlockAsm12B
13192  	CMPL DX, $0x00000100
13193  	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm12B
13194  	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm12B
13195  
13196  three_bytes_emit_remainder_encodeSnappyBlockAsm12B:
13197  	MOVB $0xf4, (CX)
13198  	MOVW DX, 1(CX)
13199  	ADDQ $0x03, CX
13200  	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm12B
13201  
13202  two_bytes_emit_remainder_encodeSnappyBlockAsm12B:
13203  	MOVB $0xf0, (CX)
13204  	MOVB DL, 1(CX)
13205  	ADDQ $0x02, CX
13206  	CMPL DX, $0x40
13207  	JB   memmove_emit_remainder_encodeSnappyBlockAsm12B
13208  	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm12B
13209  
13210  one_byte_emit_remainder_encodeSnappyBlockAsm12B:
13211  	SHLB $0x02, DL
13212  	MOVB DL, (CX)
13213  	ADDQ $0x01, CX
13214  
13215  memmove_emit_remainder_encodeSnappyBlockAsm12B:
13216  	LEAQ (CX)(SI*1), DX
13217  	MOVL SI, BX
13218  
13219  	// genMemMoveShort
13220  	CMPQ BX, $0x03
13221  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2
13222  	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3
13223  	CMPQ BX, $0x08
13224  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7
13225  	CMPQ BX, $0x10
13226  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16
13227  	CMPQ BX, $0x20
13228  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32
13229  	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64
13230  
13231  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2:
13232  	MOVB (AX), SI
13233  	MOVB -1(AX)(BX*1), AL
13234  	MOVB SI, (CX)
13235  	MOVB AL, -1(CX)(BX*1)
13236  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
13237  
13238  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3:
13239  	MOVW (AX), SI
13240  	MOVB 2(AX), AL
13241  	MOVW SI, (CX)
13242  	MOVB AL, 2(CX)
13243  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
13244  
13245  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7:
13246  	MOVL (AX), SI
13247  	MOVL -4(AX)(BX*1), AX
13248  	MOVL SI, (CX)
13249  	MOVL AX, -4(CX)(BX*1)
13250  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
13251  
13252  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16:
13253  	MOVQ (AX), SI
13254  	MOVQ -8(AX)(BX*1), AX
13255  	MOVQ SI, (CX)
13256  	MOVQ AX, -8(CX)(BX*1)
13257  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
13258  
13259  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32:
13260  	MOVOU (AX), X0
13261  	MOVOU -16(AX)(BX*1), X1
13262  	MOVOU X0, (CX)
13263  	MOVOU X1, -16(CX)(BX*1)
13264  	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
13265  
13266  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64:
13267  	MOVOU (AX), X0
13268  	MOVOU 16(AX), X1
13269  	MOVOU -32(AX)(BX*1), X2
13270  	MOVOU -16(AX)(BX*1), X3
13271  	MOVOU X0, (CX)
13272  	MOVOU X1, 16(CX)
13273  	MOVOU X2, -32(CX)(BX*1)
13274  	MOVOU X3, -16(CX)(BX*1)
13275  
13276  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B:
13277  	MOVQ DX, CX
13278  	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
13279  
13280  memmove_long_emit_remainder_encodeSnappyBlockAsm12B:
13281  	LEAQ (CX)(SI*1), DX
13282  	MOVL SI, BX
13283  
13284  	// genMemMoveLong
13285  	MOVOU (AX), X0
13286  	MOVOU 16(AX), X1
13287  	MOVOU -32(AX)(BX*1), X2
13288  	MOVOU -16(AX)(BX*1), X3
13289  	MOVQ  BX, DI
13290  	SHRQ  $0x05, DI
13291  	MOVQ  CX, SI
13292  	ANDL  $0x0000001f, SI
13293  	MOVQ  $0x00000040, R8
13294  	SUBQ  SI, R8
13295  	DECQ  DI
13296  	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
13297  	LEAQ  -32(AX)(R8*1), SI
13298  	LEAQ  -32(CX)(R8*1), R9
13299  
13300  emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back:
13301  	MOVOU (SI), X4
13302  	MOVOU 16(SI), X5
13303  	MOVOA X4, (R9)
13304  	MOVOA X5, 16(R9)
13305  	ADDQ  $0x20, R9
13306  	ADDQ  $0x20, SI
13307  	ADDQ  $0x20, R8
13308  	DECQ  DI
13309  	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back
13310  
13311  emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
13312  	MOVOU -32(AX)(R8*1), X4
13313  	MOVOU -16(AX)(R8*1), X5
13314  	MOVOA X4, -32(CX)(R8*1)
13315  	MOVOA X5, -16(CX)(R8*1)
13316  	ADDQ  $0x20, R8
13317  	CMPQ  BX, R8
13318  	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
13319  	MOVOU X0, (CX)
13320  	MOVOU X1, 16(CX)
13321  	MOVOU X2, -32(CX)(BX*1)
13322  	MOVOU X3, -16(CX)(BX*1)
13323  	MOVQ  DX, CX
13324  
13325  emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B:
13326  	MOVQ dst_base+0(FP), AX
13327  	SUBQ AX, CX
13328  	MOVQ CX, ret+56(FP)
13329  	RET
13330  
13331  // func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int
13332  // Requires: BMI, SSE2
13333  TEXT ·encodeSnappyBlockAsm10B(SB), $24-64
13334  	MOVQ tmp+48(FP), AX
13335  	MOVQ dst_base+0(FP), CX
13336  	MOVQ $0x00000020, DX
13337  	MOVQ AX, BX
13338  	PXOR X0, X0
13339  
13340  zero_loop_encodeSnappyBlockAsm10B:
13341  	MOVOU X0, (BX)
13342  	MOVOU X0, 16(BX)
13343  	MOVOU X0, 32(BX)
13344  	MOVOU X0, 48(BX)
13345  	MOVOU X0, 64(BX)
13346  	MOVOU X0, 80(BX)
13347  	MOVOU X0, 96(BX)
13348  	MOVOU X0, 112(BX)
13349  	ADDQ  $0x80, BX
13350  	DECQ  DX
13351  	JNZ   zero_loop_encodeSnappyBlockAsm10B
13352  	MOVL  $0x00000000, 12(SP)
13353  	MOVQ  src_len+32(FP), DX
13354  	LEAQ  -9(DX), BX
13355  	LEAQ  -8(DX), SI
13356  	MOVL  SI, 8(SP)
13357  	SHRQ  $0x05, DX
13358  	SUBL  DX, BX
13359  	LEAQ  (CX)(BX*1), BX
13360  	MOVQ  BX, (SP)
13361  	MOVL  $0x00000001, DX
13362  	MOVL  DX, 16(SP)
13363  	MOVQ  src_base+24(FP), BX
13364  
13365  search_loop_encodeSnappyBlockAsm10B:
13366  	MOVL  DX, SI
13367  	SUBL  12(SP), SI
13368  	SHRL  $0x05, SI
13369  	LEAL  4(DX)(SI*1), SI
13370  	CMPL  SI, 8(SP)
13371  	JAE   emit_remainder_encodeSnappyBlockAsm10B
13372  	MOVQ  (BX)(DX*1), DI
13373  	MOVL  SI, 20(SP)
13374  	MOVQ  $0x9e3779b1, R9
13375  	MOVQ  DI, R10
13376  	MOVQ  DI, R11
13377  	SHRQ  $0x08, R11
13378  	SHLQ  $0x20, R10
13379  	IMULQ R9, R10
13380  	SHRQ  $0x36, R10
13381  	SHLQ  $0x20, R11
13382  	IMULQ R9, R11
13383  	SHRQ  $0x36, R11
13384  	MOVL  (AX)(R10*4), SI
13385  	MOVL  (AX)(R11*4), R8
13386  	MOVL  DX, (AX)(R10*4)
13387  	LEAL  1(DX), R10
13388  	MOVL  R10, (AX)(R11*4)
13389  	MOVQ  DI, R10
13390  	SHRQ  $0x10, R10
13391  	SHLQ  $0x20, R10
13392  	IMULQ R9, R10
13393  	SHRQ  $0x36, R10
13394  	MOVL  DX, R9
13395  	SUBL  16(SP), R9
13396  	MOVL  1(BX)(R9*1), R11
13397  	MOVQ  DI, R9
13398  	SHRQ  $0x08, R9
13399  	CMPL  R9, R11
13400  	JNE   no_repeat_found_encodeSnappyBlockAsm10B
13401  	LEAL  1(DX), DI
13402  	MOVL  12(SP), SI
13403  	MOVL  DI, R8
13404  	SUBL  16(SP), R8
13405  	JZ    repeat_extend_back_end_encodeSnappyBlockAsm10B
13406  
13407  repeat_extend_back_loop_encodeSnappyBlockAsm10B:
13408  	CMPL DI, SI
13409  	JBE  repeat_extend_back_end_encodeSnappyBlockAsm10B
13410  	MOVB -1(BX)(R8*1), R9
13411  	MOVB -1(BX)(DI*1), R10
13412  	CMPB R9, R10
13413  	JNE  repeat_extend_back_end_encodeSnappyBlockAsm10B
13414  	LEAL -1(DI), DI
13415  	DECL R8
13416  	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm10B
13417  
13418  repeat_extend_back_end_encodeSnappyBlockAsm10B:
13419  	MOVL DI, SI
13420  	SUBL 12(SP), SI
13421  	LEAQ 3(CX)(SI*1), SI
13422  	CMPQ SI, (SP)
13423  	JB   repeat_dst_size_check_encodeSnappyBlockAsm10B
13424  	MOVQ $0x00000000, ret+56(FP)
13425  	RET
13426  
13427  repeat_dst_size_check_encodeSnappyBlockAsm10B:
13428  	MOVL 12(SP), SI
13429  	CMPL SI, DI
13430  	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
13431  	MOVL DI, R8
13432  	MOVL DI, 12(SP)
13433  	LEAQ (BX)(SI*1), R9
13434  	SUBL SI, R8
13435  	LEAL -1(R8), SI
13436  	CMPL SI, $0x3c
13437  	JB   one_byte_repeat_emit_encodeSnappyBlockAsm10B
13438  	CMPL SI, $0x00000100
13439  	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm10B
13440  	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm10B
13441  
13442  three_bytes_repeat_emit_encodeSnappyBlockAsm10B:
13443  	MOVB $0xf4, (CX)
13444  	MOVW SI, 1(CX)
13445  	ADDQ $0x03, CX
13446  	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm10B
13447  
13448  two_bytes_repeat_emit_encodeSnappyBlockAsm10B:
13449  	MOVB $0xf0, (CX)
13450  	MOVB SI, 1(CX)
13451  	ADDQ $0x02, CX
13452  	CMPL SI, $0x40
13453  	JB   memmove_repeat_emit_encodeSnappyBlockAsm10B
13454  	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm10B
13455  
13456  one_byte_repeat_emit_encodeSnappyBlockAsm10B:
13457  	SHLB $0x02, SI
13458  	MOVB SI, (CX)
13459  	ADDQ $0x01, CX
13460  
13461  memmove_repeat_emit_encodeSnappyBlockAsm10B:
13462  	LEAQ (CX)(R8*1), SI
13463  
13464  	// genMemMoveShort
13465  	CMPQ R8, $0x08
13466  	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8
13467  	CMPQ R8, $0x10
13468  	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
13469  	CMPQ R8, $0x20
13470  	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
13471  	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
13472  
13473  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8:
13474  	MOVQ (R9), R10
13475  	MOVQ R10, (CX)
13476  	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
13477  
13478  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
13479  	MOVQ (R9), R10
13480  	MOVQ -8(R9)(R8*1), R9
13481  	MOVQ R10, (CX)
13482  	MOVQ R9, -8(CX)(R8*1)
13483  	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
13484  
13485  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
13486  	MOVOU (R9), X0
13487  	MOVOU -16(R9)(R8*1), X1
13488  	MOVOU X0, (CX)
13489  	MOVOU X1, -16(CX)(R8*1)
13490  	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
13491  
13492  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
13493  	MOVOU (R9), X0
13494  	MOVOU 16(R9), X1
13495  	MOVOU -32(R9)(R8*1), X2
13496  	MOVOU -16(R9)(R8*1), X3
13497  	MOVOU X0, (CX)
13498  	MOVOU X1, 16(CX)
13499  	MOVOU X2, -32(CX)(R8*1)
13500  	MOVOU X3, -16(CX)(R8*1)
13501  
13502  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B:
13503  	MOVQ SI, CX
13504  	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
13505  
13506  memmove_long_repeat_emit_encodeSnappyBlockAsm10B:
13507  	LEAQ (CX)(R8*1), SI
13508  
13509  	// genMemMoveLong
13510  	MOVOU (R9), X0
13511  	MOVOU 16(R9), X1
13512  	MOVOU -32(R9)(R8*1), X2
13513  	MOVOU -16(R9)(R8*1), X3
13514  	MOVQ  R8, R11
13515  	SHRQ  $0x05, R11
13516  	MOVQ  CX, R10
13517  	ANDL  $0x0000001f, R10
13518  	MOVQ  $0x00000040, R12
13519  	SUBQ  R10, R12
13520  	DECQ  R11
13521  	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
13522  	LEAQ  -32(R9)(R12*1), R10
13523  	LEAQ  -32(CX)(R12*1), R13
13524  
13525  emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
13526  	MOVOU (R10), X4
13527  	MOVOU 16(R10), X5
13528  	MOVOA X4, (R13)
13529  	MOVOA X5, 16(R13)
13530  	ADDQ  $0x20, R13
13531  	ADDQ  $0x20, R10
13532  	ADDQ  $0x20, R12
13533  	DECQ  R11
13534  	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
13535  
13536  emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
13537  	MOVOU -32(R9)(R12*1), X4
13538  	MOVOU -16(R9)(R12*1), X5
13539  	MOVOA X4, -32(CX)(R12*1)
13540  	MOVOA X5, -16(CX)(R12*1)
13541  	ADDQ  $0x20, R12
13542  	CMPQ  R8, R12
13543  	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
13544  	MOVOU X0, (CX)
13545  	MOVOU X1, 16(CX)
13546  	MOVOU X2, -32(CX)(R8*1)
13547  	MOVOU X3, -16(CX)(R8*1)
13548  	MOVQ  SI, CX
13549  
13550  emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B:
13551  	ADDL $0x05, DX
13552  	MOVL DX, SI
13553  	SUBL 16(SP), SI
13554  	MOVQ src_len+32(FP), R8
13555  	SUBL DX, R8
13556  	LEAQ (BX)(DX*1), R9
13557  	LEAQ (BX)(SI*1), SI
13558  
13559  	// matchLen
13560  	XORL R11, R11
13561  
13562  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B:
13563  	CMPL R8, $0x10
13564  	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B
13565  	MOVQ (R9)(R11*1), R10
13566  	MOVQ 8(R9)(R11*1), R12
13567  	XORQ (SI)(R11*1), R10
13568  	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B
13569  	XORQ 8(SI)(R11*1), R12
13570  	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B
13571  	LEAL -16(R8), R8
13572  	LEAL 16(R11), R11
13573  	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B
13574  
13575  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B:
13576  #ifdef GOAMD64_v3
13577  	TZCNTQ R12, R12
13578  
13579  #else
13580  	BSFQ R12, R12
13581  
13582  #endif
13583  	SARQ $0x03, R12
13584  	LEAL 8(R11)(R12*1), R11
13585  	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm10B
13586  
13587  matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B:
13588  	CMPL R8, $0x08
13589  	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
13590  	MOVQ (R9)(R11*1), R10
13591  	XORQ (SI)(R11*1), R10
13592  	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B
13593  	LEAL -8(R8), R8
13594  	LEAL 8(R11), R11
13595  	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
13596  
13597  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B:
13598  #ifdef GOAMD64_v3
13599  	TZCNTQ R10, R10
13600  
13601  #else
13602  	BSFQ R10, R10
13603  
13604  #endif
13605  	SARQ $0x03, R10
13606  	LEAL (R11)(R10*1), R11
13607  	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm10B
13608  
13609  matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B:
13610  	CMPL R8, $0x04
13611  	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
13612  	MOVL (R9)(R11*1), R10
13613  	CMPL (SI)(R11*1), R10
13614  	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
13615  	LEAL -4(R8), R8
13616  	LEAL 4(R11), R11
13617  
13618  matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B:
13619  	CMPL R8, $0x01
13620  	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
13621  	JB   repeat_extend_forward_end_encodeSnappyBlockAsm10B
13622  	MOVW (R9)(R11*1), R10
13623  	CMPW (SI)(R11*1), R10
13624  	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
13625  	LEAL 2(R11), R11
13626  	SUBL $0x02, R8
13627  	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm10B
13628  
13629  matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B:
13630  	MOVB (R9)(R11*1), R10
13631  	CMPB (SI)(R11*1), R10
13632  	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm10B
13633  	LEAL 1(R11), R11
13634  
13635  repeat_extend_forward_end_encodeSnappyBlockAsm10B:
13636  	ADDL R11, DX
13637  	MOVL DX, SI
13638  	SUBL DI, SI
13639  	MOVL 16(SP), DI
13640  
13641  	// emitCopy
13642  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B:
13643  	CMPL SI, $0x40
13644  	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B
13645  	MOVB $0xee, (CX)
13646  	MOVW DI, 1(CX)
13647  	LEAL -60(SI), SI
13648  	ADDQ $0x03, CX
13649  	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B
13650  
13651  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B:
13652  	MOVL SI, R8
13653  	SHLL $0x02, R8
13654  	CMPL SI, $0x0c
13655  	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
13656  	CMPL DI, $0x00000800
13657  	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
13658  	LEAL -15(R8), R8
13659  	MOVB DI, 1(CX)
13660  	SHRL $0x08, DI
13661  	SHLL $0x05, DI
13662  	ORL  DI, R8
13663  	MOVB R8, (CX)
13664  	ADDQ $0x02, CX
13665  	JMP  repeat_end_emit_encodeSnappyBlockAsm10B
13666  
13667  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B:
13668  	LEAL -2(R8), R8
13669  	MOVB R8, (CX)
13670  	MOVW DI, 1(CX)
13671  	ADDQ $0x03, CX
13672  
13673  repeat_end_emit_encodeSnappyBlockAsm10B:
13674  	MOVL DX, 12(SP)
13675  	JMP  search_loop_encodeSnappyBlockAsm10B
13676  
13677  no_repeat_found_encodeSnappyBlockAsm10B:
13678  	CMPL (BX)(SI*1), DI
13679  	JEQ  candidate_match_encodeSnappyBlockAsm10B
13680  	SHRQ $0x08, DI
13681  	MOVL (AX)(R10*4), SI
13682  	LEAL 2(DX), R9
13683  	CMPL (BX)(R8*1), DI
13684  	JEQ  candidate2_match_encodeSnappyBlockAsm10B
13685  	MOVL R9, (AX)(R10*4)
13686  	SHRQ $0x08, DI
13687  	CMPL (BX)(SI*1), DI
13688  	JEQ  candidate3_match_encodeSnappyBlockAsm10B
13689  	MOVL 20(SP), DX
13690  	JMP  search_loop_encodeSnappyBlockAsm10B
13691  
13692  candidate3_match_encodeSnappyBlockAsm10B:
13693  	ADDL $0x02, DX
13694  	JMP  candidate_match_encodeSnappyBlockAsm10B
13695  
13696  candidate2_match_encodeSnappyBlockAsm10B:
13697  	MOVL R9, (AX)(R10*4)
13698  	INCL DX
13699  	MOVL R8, SI
13700  
13701  candidate_match_encodeSnappyBlockAsm10B:
13702  	MOVL  12(SP), DI
13703  	TESTL SI, SI
13704  	JZ    match_extend_back_end_encodeSnappyBlockAsm10B
13705  
13706  match_extend_back_loop_encodeSnappyBlockAsm10B:
13707  	CMPL DX, DI
13708  	JBE  match_extend_back_end_encodeSnappyBlockAsm10B
13709  	MOVB -1(BX)(SI*1), R8
13710  	MOVB -1(BX)(DX*1), R9
13711  	CMPB R8, R9
13712  	JNE  match_extend_back_end_encodeSnappyBlockAsm10B
13713  	LEAL -1(DX), DX
13714  	DECL SI
13715  	JZ   match_extend_back_end_encodeSnappyBlockAsm10B
13716  	JMP  match_extend_back_loop_encodeSnappyBlockAsm10B
13717  
13718  match_extend_back_end_encodeSnappyBlockAsm10B:
13719  	MOVL DX, DI
13720  	SUBL 12(SP), DI
13721  	LEAQ 3(CX)(DI*1), DI
13722  	CMPQ DI, (SP)
13723  	JB   match_dst_size_check_encodeSnappyBlockAsm10B
13724  	MOVQ $0x00000000, ret+56(FP)
13725  	RET
13726  
13727  match_dst_size_check_encodeSnappyBlockAsm10B:
13728  	MOVL DX, DI
13729  	MOVL 12(SP), R8
13730  	CMPL R8, DI
13731  	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm10B
13732  	MOVL DI, R9
13733  	MOVL DI, 12(SP)
13734  	LEAQ (BX)(R8*1), DI
13735  	SUBL R8, R9
13736  	LEAL -1(R9), R8
13737  	CMPL R8, $0x3c
13738  	JB   one_byte_match_emit_encodeSnappyBlockAsm10B
13739  	CMPL R8, $0x00000100
13740  	JB   two_bytes_match_emit_encodeSnappyBlockAsm10B
13741  	JB   three_bytes_match_emit_encodeSnappyBlockAsm10B
13742  
13743  three_bytes_match_emit_encodeSnappyBlockAsm10B:
13744  	MOVB $0xf4, (CX)
13745  	MOVW R8, 1(CX)
13746  	ADDQ $0x03, CX
13747  	JMP  memmove_long_match_emit_encodeSnappyBlockAsm10B
13748  
13749  two_bytes_match_emit_encodeSnappyBlockAsm10B:
13750  	MOVB $0xf0, (CX)
13751  	MOVB R8, 1(CX)
13752  	ADDQ $0x02, CX
13753  	CMPL R8, $0x40
13754  	JB   memmove_match_emit_encodeSnappyBlockAsm10B
13755  	JMP  memmove_long_match_emit_encodeSnappyBlockAsm10B
13756  
13757  one_byte_match_emit_encodeSnappyBlockAsm10B:
13758  	SHLB $0x02, R8
13759  	MOVB R8, (CX)
13760  	ADDQ $0x01, CX
13761  
13762  memmove_match_emit_encodeSnappyBlockAsm10B:
13763  	LEAQ (CX)(R9*1), R8
13764  
13765  	// genMemMoveShort
13766  	CMPQ R9, $0x08
13767  	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8
13768  	CMPQ R9, $0x10
13769  	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
13770  	CMPQ R9, $0x20
13771  	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
13772  	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
13773  
13774  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8:
13775  	MOVQ (DI), R10
13776  	MOVQ R10, (CX)
13777  	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
13778  
13779  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
13780  	MOVQ (DI), R10
13781  	MOVQ -8(DI)(R9*1), DI
13782  	MOVQ R10, (CX)
13783  	MOVQ DI, -8(CX)(R9*1)
13784  	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
13785  
13786  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
13787  	MOVOU (DI), X0
13788  	MOVOU -16(DI)(R9*1), X1
13789  	MOVOU X0, (CX)
13790  	MOVOU X1, -16(CX)(R9*1)
13791  	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
13792  
13793  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
13794  	MOVOU (DI), X0
13795  	MOVOU 16(DI), X1
13796  	MOVOU -32(DI)(R9*1), X2
13797  	MOVOU -16(DI)(R9*1), X3
13798  	MOVOU X0, (CX)
13799  	MOVOU X1, 16(CX)
13800  	MOVOU X2, -32(CX)(R9*1)
13801  	MOVOU X3, -16(CX)(R9*1)
13802  
13803  memmove_end_copy_match_emit_encodeSnappyBlockAsm10B:
13804  	MOVQ R8, CX
13805  	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm10B
13806  
13807  memmove_long_match_emit_encodeSnappyBlockAsm10B:
13808  	LEAQ (CX)(R9*1), R8
13809  
13810  	// genMemMoveLong
13811  	MOVOU (DI), X0
13812  	MOVOU 16(DI), X1
13813  	MOVOU -32(DI)(R9*1), X2
13814  	MOVOU -16(DI)(R9*1), X3
13815  	MOVQ  R9, R11
13816  	SHRQ  $0x05, R11
13817  	MOVQ  CX, R10
13818  	ANDL  $0x0000001f, R10
13819  	MOVQ  $0x00000040, R12
13820  	SUBQ  R10, R12
13821  	DECQ  R11
13822  	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
13823  	LEAQ  -32(DI)(R12*1), R10
13824  	LEAQ  -32(CX)(R12*1), R13
13825  
13826  emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
13827  	MOVOU (R10), X4
13828  	MOVOU 16(R10), X5
13829  	MOVOA X4, (R13)
13830  	MOVOA X5, 16(R13)
13831  	ADDQ  $0x20, R13
13832  	ADDQ  $0x20, R10
13833  	ADDQ  $0x20, R12
13834  	DECQ  R11
13835  	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
13836  
13837  emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
13838  	MOVOU -32(DI)(R12*1), X4
13839  	MOVOU -16(DI)(R12*1), X5
13840  	MOVOA X4, -32(CX)(R12*1)
13841  	MOVOA X5, -16(CX)(R12*1)
13842  	ADDQ  $0x20, R12
13843  	CMPQ  R9, R12
13844  	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
13845  	MOVOU X0, (CX)
13846  	MOVOU X1, 16(CX)
13847  	MOVOU X2, -32(CX)(R9*1)
13848  	MOVOU X3, -16(CX)(R9*1)
13849  	MOVQ  R8, CX
13850  
13851  emit_literal_done_match_emit_encodeSnappyBlockAsm10B:
13852  match_nolit_loop_encodeSnappyBlockAsm10B:
13853  	MOVL DX, DI
13854  	SUBL SI, DI
13855  	MOVL DI, 16(SP)
13856  	ADDL $0x04, DX
13857  	ADDL $0x04, SI
13858  	MOVQ src_len+32(FP), DI
13859  	SUBL DX, DI
13860  	LEAQ (BX)(DX*1), R8
13861  	LEAQ (BX)(SI*1), SI
13862  
13863  	// matchLen
13864  	XORL R10, R10
13865  
13866  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B:
13867  	CMPL DI, $0x10
13868  	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm10B
13869  	MOVQ (R8)(R10*1), R9
13870  	MOVQ 8(R8)(R10*1), R11
13871  	XORQ (SI)(R10*1), R9
13872  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B
13873  	XORQ 8(SI)(R10*1), R11
13874  	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B
13875  	LEAL -16(DI), DI
13876  	LEAL 16(R10), R10
13877  	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B
13878  
13879  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B:
13880  #ifdef GOAMD64_v3
13881  	TZCNTQ R11, R11
13882  
13883  #else
13884  	BSFQ R11, R11
13885  
13886  #endif
13887  	SARQ $0x03, R11
13888  	LEAL 8(R10)(R11*1), R10
13889  	JMP  match_nolit_end_encodeSnappyBlockAsm10B
13890  
13891  matchlen_match8_match_nolit_encodeSnappyBlockAsm10B:
13892  	CMPL DI, $0x08
13893  	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
13894  	MOVQ (R8)(R10*1), R9
13895  	XORQ (SI)(R10*1), R9
13896  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B
13897  	LEAL -8(DI), DI
13898  	LEAL 8(R10), R10
13899  	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
13900  
13901  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B:
13902  #ifdef GOAMD64_v3
13903  	TZCNTQ R9, R9
13904  
13905  #else
13906  	BSFQ R9, R9
13907  
13908  #endif
13909  	SARQ $0x03, R9
13910  	LEAL (R10)(R9*1), R10
13911  	JMP  match_nolit_end_encodeSnappyBlockAsm10B
13912  
13913  matchlen_match4_match_nolit_encodeSnappyBlockAsm10B:
13914  	CMPL DI, $0x04
13915  	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
13916  	MOVL (R8)(R10*1), R9
13917  	CMPL (SI)(R10*1), R9
13918  	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
13919  	LEAL -4(DI), DI
13920  	LEAL 4(R10), R10
13921  
13922  matchlen_match2_match_nolit_encodeSnappyBlockAsm10B:
13923  	CMPL DI, $0x01
13924  	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
13925  	JB   match_nolit_end_encodeSnappyBlockAsm10B
13926  	MOVW (R8)(R10*1), R9
13927  	CMPW (SI)(R10*1), R9
13928  	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
13929  	LEAL 2(R10), R10
13930  	SUBL $0x02, DI
13931  	JZ   match_nolit_end_encodeSnappyBlockAsm10B
13932  
13933  matchlen_match1_match_nolit_encodeSnappyBlockAsm10B:
13934  	MOVB (R8)(R10*1), R9
13935  	CMPB (SI)(R10*1), R9
13936  	JNE  match_nolit_end_encodeSnappyBlockAsm10B
13937  	LEAL 1(R10), R10
13938  
13939  match_nolit_end_encodeSnappyBlockAsm10B:
13940  	ADDL R10, DX
13941  	MOVL 16(SP), SI
13942  	ADDL $0x04, R10
13943  	MOVL DX, 12(SP)
13944  
13945  	// emitCopy
13946  two_byte_offset_match_nolit_encodeSnappyBlockAsm10B:
13947  	CMPL R10, $0x40
13948  	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B
13949  	MOVB $0xee, (CX)
13950  	MOVW SI, 1(CX)
13951  	LEAL -60(R10), R10
13952  	ADDQ $0x03, CX
13953  	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm10B
13954  
13955  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B:
13956  	MOVL R10, DI
13957  	SHLL $0x02, DI
13958  	CMPL R10, $0x0c
13959  	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
13960  	CMPL SI, $0x00000800
13961  	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
13962  	LEAL -15(DI), DI
13963  	MOVB SI, 1(CX)
13964  	SHRL $0x08, SI
13965  	SHLL $0x05, SI
13966  	ORL  SI, DI
13967  	MOVB DI, (CX)
13968  	ADDQ $0x02, CX
13969  	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm10B
13970  
13971  emit_copy_three_match_nolit_encodeSnappyBlockAsm10B:
13972  	LEAL -2(DI), DI
13973  	MOVB DI, (CX)
13974  	MOVW SI, 1(CX)
13975  	ADDQ $0x03, CX
13976  
13977  match_nolit_emitcopy_end_encodeSnappyBlockAsm10B:
13978  	CMPL DX, 8(SP)
13979  	JAE  emit_remainder_encodeSnappyBlockAsm10B
13980  	MOVQ -2(BX)(DX*1), DI
13981  	CMPQ CX, (SP)
13982  	JB   match_nolit_dst_ok_encodeSnappyBlockAsm10B
13983  	MOVQ $0x00000000, ret+56(FP)
13984  	RET
13985  
13986  match_nolit_dst_ok_encodeSnappyBlockAsm10B:
13987  	MOVQ  $0x9e3779b1, R9
13988  	MOVQ  DI, R8
13989  	SHRQ  $0x10, DI
13990  	MOVQ  DI, SI
13991  	SHLQ  $0x20, R8
13992  	IMULQ R9, R8
13993  	SHRQ  $0x36, R8
13994  	SHLQ  $0x20, SI
13995  	IMULQ R9, SI
13996  	SHRQ  $0x36, SI
13997  	LEAL  -2(DX), R9
13998  	LEAQ  (AX)(SI*4), R10
13999  	MOVL  (R10), SI
14000  	MOVL  R9, (AX)(R8*4)
14001  	MOVL  DX, (R10)
14002  	CMPL  (BX)(SI*1), DI
14003  	JEQ   match_nolit_loop_encodeSnappyBlockAsm10B
14004  	INCL  DX
14005  	JMP   search_loop_encodeSnappyBlockAsm10B
14006  
14007  emit_remainder_encodeSnappyBlockAsm10B:
14008  	MOVQ src_len+32(FP), AX
14009  	SUBL 12(SP), AX
14010  	LEAQ 3(CX)(AX*1), AX
14011  	CMPQ AX, (SP)
14012  	JB   emit_remainder_ok_encodeSnappyBlockAsm10B
14013  	MOVQ $0x00000000, ret+56(FP)
14014  	RET
14015  
14016  emit_remainder_ok_encodeSnappyBlockAsm10B:
14017  	MOVQ src_len+32(FP), AX
14018  	MOVL 12(SP), DX
14019  	CMPL DX, AX
14020  	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
14021  	MOVL AX, SI
14022  	MOVL AX, 12(SP)
14023  	LEAQ (BX)(DX*1), AX
14024  	SUBL DX, SI
14025  	LEAL -1(SI), DX
14026  	CMPL DX, $0x3c
14027  	JB   one_byte_emit_remainder_encodeSnappyBlockAsm10B
14028  	CMPL DX, $0x00000100
14029  	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm10B
14030  	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm10B
14031  
14032  three_bytes_emit_remainder_encodeSnappyBlockAsm10B:
14033  	MOVB $0xf4, (CX)
14034  	MOVW DX, 1(CX)
14035  	ADDQ $0x03, CX
14036  	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm10B
14037  
14038  two_bytes_emit_remainder_encodeSnappyBlockAsm10B:
14039  	MOVB $0xf0, (CX)
14040  	MOVB DL, 1(CX)
14041  	ADDQ $0x02, CX
14042  	CMPL DX, $0x40
14043  	JB   memmove_emit_remainder_encodeSnappyBlockAsm10B
14044  	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm10B
14045  
14046  one_byte_emit_remainder_encodeSnappyBlockAsm10B:
14047  	SHLB $0x02, DL
14048  	MOVB DL, (CX)
14049  	ADDQ $0x01, CX
14050  
14051  memmove_emit_remainder_encodeSnappyBlockAsm10B:
14052  	LEAQ (CX)(SI*1), DX
14053  	MOVL SI, BX
14054  
14055  	// genMemMoveShort
14056  	CMPQ BX, $0x03
14057  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2
14058  	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3
14059  	CMPQ BX, $0x08
14060  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7
14061  	CMPQ BX, $0x10
14062  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16
14063  	CMPQ BX, $0x20
14064  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32
14065  	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64
14066  
14067  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2:
14068  	MOVB (AX), SI
14069  	MOVB -1(AX)(BX*1), AL
14070  	MOVB SI, (CX)
14071  	MOVB AL, -1(CX)(BX*1)
14072  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
14073  
14074  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3:
14075  	MOVW (AX), SI
14076  	MOVB 2(AX), AL
14077  	MOVW SI, (CX)
14078  	MOVB AL, 2(CX)
14079  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
14080  
14081  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7:
14082  	MOVL (AX), SI
14083  	MOVL -4(AX)(BX*1), AX
14084  	MOVL SI, (CX)
14085  	MOVL AX, -4(CX)(BX*1)
14086  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
14087  
14088  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16:
14089  	MOVQ (AX), SI
14090  	MOVQ -8(AX)(BX*1), AX
14091  	MOVQ SI, (CX)
14092  	MOVQ AX, -8(CX)(BX*1)
14093  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
14094  
14095  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32:
14096  	MOVOU (AX), X0
14097  	MOVOU -16(AX)(BX*1), X1
14098  	MOVOU X0, (CX)
14099  	MOVOU X1, -16(CX)(BX*1)
14100  	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
14101  
14102  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64:
14103  	MOVOU (AX), X0
14104  	MOVOU 16(AX), X1
14105  	MOVOU -32(AX)(BX*1), X2
14106  	MOVOU -16(AX)(BX*1), X3
14107  	MOVOU X0, (CX)
14108  	MOVOU X1, 16(CX)
14109  	MOVOU X2, -32(CX)(BX*1)
14110  	MOVOU X3, -16(CX)(BX*1)
14111  
14112  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B:
14113  	MOVQ DX, CX
14114  	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
14115  
14116  memmove_long_emit_remainder_encodeSnappyBlockAsm10B:
14117  	LEAQ (CX)(SI*1), DX
14118  	MOVL SI, BX
14119  
14120  	// genMemMoveLong
14121  	MOVOU (AX), X0
14122  	MOVOU 16(AX), X1
14123  	MOVOU -32(AX)(BX*1), X2
14124  	MOVOU -16(AX)(BX*1), X3
14125  	MOVQ  BX, DI
14126  	SHRQ  $0x05, DI
14127  	MOVQ  CX, SI
14128  	ANDL  $0x0000001f, SI
14129  	MOVQ  $0x00000040, R8
14130  	SUBQ  SI, R8
14131  	DECQ  DI
14132  	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
14133  	LEAQ  -32(AX)(R8*1), SI
14134  	LEAQ  -32(CX)(R8*1), R9
14135  
14136  emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back:
14137  	MOVOU (SI), X4
14138  	MOVOU 16(SI), X5
14139  	MOVOA X4, (R9)
14140  	MOVOA X5, 16(R9)
14141  	ADDQ  $0x20, R9
14142  	ADDQ  $0x20, SI
14143  	ADDQ  $0x20, R8
14144  	DECQ  DI
14145  	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back
14146  
14147  emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
14148  	MOVOU -32(AX)(R8*1), X4
14149  	MOVOU -16(AX)(R8*1), X5
14150  	MOVOA X4, -32(CX)(R8*1)
14151  	MOVOA X5, -16(CX)(R8*1)
14152  	ADDQ  $0x20, R8
14153  	CMPQ  BX, R8
14154  	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
14155  	MOVOU X0, (CX)
14156  	MOVOU X1, 16(CX)
14157  	MOVOU X2, -32(CX)(BX*1)
14158  	MOVOU X3, -16(CX)(BX*1)
14159  	MOVQ  DX, CX
14160  
14161  emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B:
14162  	MOVQ dst_base+0(FP), AX
14163  	SUBQ AX, CX
14164  	MOVQ CX, ret+56(FP)
14165  	RET
14166  
14167  // func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int
14168  // Requires: BMI, SSE2
14169  TEXT ·encodeSnappyBlockAsm8B(SB), $24-64
14170  	MOVQ tmp+48(FP), AX
14171  	MOVQ dst_base+0(FP), CX
14172  	MOVQ $0x00000008, DX
14173  	MOVQ AX, BX
14174  	PXOR X0, X0
14175  
14176  zero_loop_encodeSnappyBlockAsm8B:
14177  	MOVOU X0, (BX)
14178  	MOVOU X0, 16(BX)
14179  	MOVOU X0, 32(BX)
14180  	MOVOU X0, 48(BX)
14181  	MOVOU X0, 64(BX)
14182  	MOVOU X0, 80(BX)
14183  	MOVOU X0, 96(BX)
14184  	MOVOU X0, 112(BX)
14185  	ADDQ  $0x80, BX
14186  	DECQ  DX
14187  	JNZ   zero_loop_encodeSnappyBlockAsm8B
14188  	MOVL  $0x00000000, 12(SP)
14189  	MOVQ  src_len+32(FP), DX
14190  	LEAQ  -9(DX), BX
14191  	LEAQ  -8(DX), SI
14192  	MOVL  SI, 8(SP)
14193  	SHRQ  $0x05, DX
14194  	SUBL  DX, BX
14195  	LEAQ  (CX)(BX*1), BX
14196  	MOVQ  BX, (SP)
14197  	MOVL  $0x00000001, DX
14198  	MOVL  DX, 16(SP)
14199  	MOVQ  src_base+24(FP), BX
14200  
14201  search_loop_encodeSnappyBlockAsm8B:
14202  	MOVL  DX, SI
14203  	SUBL  12(SP), SI
14204  	SHRL  $0x04, SI
14205  	LEAL  4(DX)(SI*1), SI
14206  	CMPL  SI, 8(SP)
14207  	JAE   emit_remainder_encodeSnappyBlockAsm8B
14208  	MOVQ  (BX)(DX*1), DI
14209  	MOVL  SI, 20(SP)
14210  	MOVQ  $0x9e3779b1, R9
14211  	MOVQ  DI, R10
14212  	MOVQ  DI, R11
14213  	SHRQ  $0x08, R11
14214  	SHLQ  $0x20, R10
14215  	IMULQ R9, R10
14216  	SHRQ  $0x38, R10
14217  	SHLQ  $0x20, R11
14218  	IMULQ R9, R11
14219  	SHRQ  $0x38, R11
14220  	MOVL  (AX)(R10*4), SI
14221  	MOVL  (AX)(R11*4), R8
14222  	MOVL  DX, (AX)(R10*4)
14223  	LEAL  1(DX), R10
14224  	MOVL  R10, (AX)(R11*4)
14225  	MOVQ  DI, R10
14226  	SHRQ  $0x10, R10
14227  	SHLQ  $0x20, R10
14228  	IMULQ R9, R10
14229  	SHRQ  $0x38, R10
14230  	MOVL  DX, R9
14231  	SUBL  16(SP), R9
14232  	MOVL  1(BX)(R9*1), R11
14233  	MOVQ  DI, R9
14234  	SHRQ  $0x08, R9
14235  	CMPL  R9, R11
14236  	JNE   no_repeat_found_encodeSnappyBlockAsm8B
14237  	LEAL  1(DX), DI
14238  	MOVL  12(SP), SI
14239  	MOVL  DI, R8
14240  	SUBL  16(SP), R8
14241  	JZ    repeat_extend_back_end_encodeSnappyBlockAsm8B
14242  
14243  repeat_extend_back_loop_encodeSnappyBlockAsm8B:
14244  	CMPL DI, SI
14245  	JBE  repeat_extend_back_end_encodeSnappyBlockAsm8B
14246  	MOVB -1(BX)(R8*1), R9
14247  	MOVB -1(BX)(DI*1), R10
14248  	CMPB R9, R10
14249  	JNE  repeat_extend_back_end_encodeSnappyBlockAsm8B
14250  	LEAL -1(DI), DI
14251  	DECL R8
14252  	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm8B
14253  
14254  repeat_extend_back_end_encodeSnappyBlockAsm8B:
14255  	MOVL DI, SI
14256  	SUBL 12(SP), SI
14257  	LEAQ 3(CX)(SI*1), SI
14258  	CMPQ SI, (SP)
14259  	JB   repeat_dst_size_check_encodeSnappyBlockAsm8B
14260  	MOVQ $0x00000000, ret+56(FP)
14261  	RET
14262  
14263  repeat_dst_size_check_encodeSnappyBlockAsm8B:
14264  	MOVL 12(SP), SI
14265  	CMPL SI, DI
14266  	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
14267  	MOVL DI, R8
14268  	MOVL DI, 12(SP)
14269  	LEAQ (BX)(SI*1), R9
14270  	SUBL SI, R8
14271  	LEAL -1(R8), SI
14272  	CMPL SI, $0x3c
14273  	JB   one_byte_repeat_emit_encodeSnappyBlockAsm8B
14274  	CMPL SI, $0x00000100
14275  	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm8B
14276  	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm8B
14277  
14278  three_bytes_repeat_emit_encodeSnappyBlockAsm8B:
14279  	MOVB $0xf4, (CX)
14280  	MOVW SI, 1(CX)
14281  	ADDQ $0x03, CX
14282  	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm8B
14283  
14284  two_bytes_repeat_emit_encodeSnappyBlockAsm8B:
14285  	MOVB $0xf0, (CX)
14286  	MOVB SI, 1(CX)
14287  	ADDQ $0x02, CX
14288  	CMPL SI, $0x40
14289  	JB   memmove_repeat_emit_encodeSnappyBlockAsm8B
14290  	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm8B
14291  
14292  one_byte_repeat_emit_encodeSnappyBlockAsm8B:
14293  	SHLB $0x02, SI
14294  	MOVB SI, (CX)
14295  	ADDQ $0x01, CX
14296  
14297  memmove_repeat_emit_encodeSnappyBlockAsm8B:
14298  	LEAQ (CX)(R8*1), SI
14299  
14300  	// genMemMoveShort
14301  	CMPQ R8, $0x08
14302  	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8
14303  	CMPQ R8, $0x10
14304  	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
14305  	CMPQ R8, $0x20
14306  	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
14307  	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
14308  
14309  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8:
14310  	MOVQ (R9), R10
14311  	MOVQ R10, (CX)
14312  	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
14313  
14314  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
14315  	MOVQ (R9), R10
14316  	MOVQ -8(R9)(R8*1), R9
14317  	MOVQ R10, (CX)
14318  	MOVQ R9, -8(CX)(R8*1)
14319  	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
14320  
14321  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
14322  	MOVOU (R9), X0
14323  	MOVOU -16(R9)(R8*1), X1
14324  	MOVOU X0, (CX)
14325  	MOVOU X1, -16(CX)(R8*1)
14326  	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
14327  
14328  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
14329  	MOVOU (R9), X0
14330  	MOVOU 16(R9), X1
14331  	MOVOU -32(R9)(R8*1), X2
14332  	MOVOU -16(R9)(R8*1), X3
14333  	MOVOU X0, (CX)
14334  	MOVOU X1, 16(CX)
14335  	MOVOU X2, -32(CX)(R8*1)
14336  	MOVOU X3, -16(CX)(R8*1)
14337  
14338  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B:
14339  	MOVQ SI, CX
14340  	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
14341  
14342  memmove_long_repeat_emit_encodeSnappyBlockAsm8B:
14343  	LEAQ (CX)(R8*1), SI
14344  
14345  	// genMemMoveLong
14346  	MOVOU (R9), X0
14347  	MOVOU 16(R9), X1
14348  	MOVOU -32(R9)(R8*1), X2
14349  	MOVOU -16(R9)(R8*1), X3
14350  	MOVQ  R8, R11
14351  	SHRQ  $0x05, R11
14352  	MOVQ  CX, R10
14353  	ANDL  $0x0000001f, R10
14354  	MOVQ  $0x00000040, R12
14355  	SUBQ  R10, R12
14356  	DECQ  R11
14357  	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
14358  	LEAQ  -32(R9)(R12*1), R10
14359  	LEAQ  -32(CX)(R12*1), R13
14360  
14361  emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
14362  	MOVOU (R10), X4
14363  	MOVOU 16(R10), X5
14364  	MOVOA X4, (R13)
14365  	MOVOA X5, 16(R13)
14366  	ADDQ  $0x20, R13
14367  	ADDQ  $0x20, R10
14368  	ADDQ  $0x20, R12
14369  	DECQ  R11
14370  	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
14371  
14372  emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
14373  	MOVOU -32(R9)(R12*1), X4
14374  	MOVOU -16(R9)(R12*1), X5
14375  	MOVOA X4, -32(CX)(R12*1)
14376  	MOVOA X5, -16(CX)(R12*1)
14377  	ADDQ  $0x20, R12
14378  	CMPQ  R8, R12
14379  	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
14380  	MOVOU X0, (CX)
14381  	MOVOU X1, 16(CX)
14382  	MOVOU X2, -32(CX)(R8*1)
14383  	MOVOU X3, -16(CX)(R8*1)
14384  	MOVQ  SI, CX
14385  
14386  emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B:
14387  	ADDL $0x05, DX
14388  	MOVL DX, SI
14389  	SUBL 16(SP), SI
14390  	MOVQ src_len+32(FP), R8
14391  	SUBL DX, R8
14392  	LEAQ (BX)(DX*1), R9
14393  	LEAQ (BX)(SI*1), SI
14394  
14395  	// matchLen
14396  	XORL R11, R11
14397  
14398  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B:
14399  	CMPL R8, $0x10
14400  	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B
14401  	MOVQ (R9)(R11*1), R10
14402  	MOVQ 8(R9)(R11*1), R12
14403  	XORQ (SI)(R11*1), R10
14404  	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B
14405  	XORQ 8(SI)(R11*1), R12
14406  	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B
14407  	LEAL -16(R8), R8
14408  	LEAL 16(R11), R11
14409  	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B
14410  
14411  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B:
14412  #ifdef GOAMD64_v3
14413  	TZCNTQ R12, R12
14414  
14415  #else
14416  	BSFQ R12, R12
14417  
14418  #endif
14419  	SARQ $0x03, R12
14420  	LEAL 8(R11)(R12*1), R11
14421  	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm8B
14422  
14423  matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B:
14424  	CMPL R8, $0x08
14425  	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
14426  	MOVQ (R9)(R11*1), R10
14427  	XORQ (SI)(R11*1), R10
14428  	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B
14429  	LEAL -8(R8), R8
14430  	LEAL 8(R11), R11
14431  	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
14432  
14433  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B:
14434  #ifdef GOAMD64_v3
14435  	TZCNTQ R10, R10
14436  
14437  #else
14438  	BSFQ R10, R10
14439  
14440  #endif
14441  	SARQ $0x03, R10
14442  	LEAL (R11)(R10*1), R11
14443  	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm8B
14444  
14445  matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B:
14446  	CMPL R8, $0x04
14447  	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
14448  	MOVL (R9)(R11*1), R10
14449  	CMPL (SI)(R11*1), R10
14450  	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
14451  	LEAL -4(R8), R8
14452  	LEAL 4(R11), R11
14453  
14454  matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B:
14455  	CMPL R8, $0x01
14456  	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
14457  	JB   repeat_extend_forward_end_encodeSnappyBlockAsm8B
14458  	MOVW (R9)(R11*1), R10
14459  	CMPW (SI)(R11*1), R10
14460  	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
14461  	LEAL 2(R11), R11
14462  	SUBL $0x02, R8
14463  	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm8B
14464  
14465  matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B:
14466  	MOVB (R9)(R11*1), R10
14467  	CMPB (SI)(R11*1), R10
14468  	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm8B
14469  	LEAL 1(R11), R11
14470  
14471  repeat_extend_forward_end_encodeSnappyBlockAsm8B:
14472  	ADDL R11, DX
14473  	MOVL DX, SI
14474  	SUBL DI, SI
14475  	MOVL 16(SP), DI
14476  
14477  	// emitCopy
14478  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B:
14479  	CMPL SI, $0x40
14480  	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B
14481  	MOVB $0xee, (CX)
14482  	MOVW DI, 1(CX)
14483  	LEAL -60(SI), SI
14484  	ADDQ $0x03, CX
14485  	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B
14486  
14487  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B:
14488  	MOVL SI, R8
14489  	SHLL $0x02, R8
14490  	CMPL SI, $0x0c
14491  	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B
14492  	LEAL -15(R8), R8
14493  	MOVB DI, 1(CX)
14494  	SHRL $0x08, DI
14495  	SHLL $0x05, DI
14496  	ORL  DI, R8
14497  	MOVB R8, (CX)
14498  	ADDQ $0x02, CX
14499  	JMP  repeat_end_emit_encodeSnappyBlockAsm8B
14500  
14501  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B:
14502  	LEAL -2(R8), R8
14503  	MOVB R8, (CX)
14504  	MOVW DI, 1(CX)
14505  	ADDQ $0x03, CX
14506  
14507  repeat_end_emit_encodeSnappyBlockAsm8B:
14508  	MOVL DX, 12(SP)
14509  	JMP  search_loop_encodeSnappyBlockAsm8B
14510  
14511  no_repeat_found_encodeSnappyBlockAsm8B:
14512  	CMPL (BX)(SI*1), DI
14513  	JEQ  candidate_match_encodeSnappyBlockAsm8B
14514  	SHRQ $0x08, DI
14515  	MOVL (AX)(R10*4), SI
14516  	LEAL 2(DX), R9
14517  	CMPL (BX)(R8*1), DI
14518  	JEQ  candidate2_match_encodeSnappyBlockAsm8B
14519  	MOVL R9, (AX)(R10*4)
14520  	SHRQ $0x08, DI
14521  	CMPL (BX)(SI*1), DI
14522  	JEQ  candidate3_match_encodeSnappyBlockAsm8B
14523  	MOVL 20(SP), DX
14524  	JMP  search_loop_encodeSnappyBlockAsm8B
14525  
14526  candidate3_match_encodeSnappyBlockAsm8B:
14527  	ADDL $0x02, DX
14528  	JMP  candidate_match_encodeSnappyBlockAsm8B
14529  
14530  candidate2_match_encodeSnappyBlockAsm8B:
14531  	MOVL R9, (AX)(R10*4)
14532  	INCL DX
14533  	MOVL R8, SI
14534  
14535  candidate_match_encodeSnappyBlockAsm8B:
14536  	MOVL  12(SP), DI
14537  	TESTL SI, SI
14538  	JZ    match_extend_back_end_encodeSnappyBlockAsm8B
14539  
14540  match_extend_back_loop_encodeSnappyBlockAsm8B:
14541  	CMPL DX, DI
14542  	JBE  match_extend_back_end_encodeSnappyBlockAsm8B
14543  	MOVB -1(BX)(SI*1), R8
14544  	MOVB -1(BX)(DX*1), R9
14545  	CMPB R8, R9
14546  	JNE  match_extend_back_end_encodeSnappyBlockAsm8B
14547  	LEAL -1(DX), DX
14548  	DECL SI
14549  	JZ   match_extend_back_end_encodeSnappyBlockAsm8B
14550  	JMP  match_extend_back_loop_encodeSnappyBlockAsm8B
14551  
14552  match_extend_back_end_encodeSnappyBlockAsm8B:
14553  	MOVL DX, DI
14554  	SUBL 12(SP), DI
14555  	LEAQ 3(CX)(DI*1), DI
14556  	CMPQ DI, (SP)
14557  	JB   match_dst_size_check_encodeSnappyBlockAsm8B
14558  	MOVQ $0x00000000, ret+56(FP)
14559  	RET
14560  
14561  match_dst_size_check_encodeSnappyBlockAsm8B:
14562  	MOVL DX, DI
14563  	MOVL 12(SP), R8
14564  	CMPL R8, DI
14565  	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm8B
14566  	MOVL DI, R9
14567  	MOVL DI, 12(SP)
14568  	LEAQ (BX)(R8*1), DI
14569  	SUBL R8, R9
14570  	LEAL -1(R9), R8
14571  	CMPL R8, $0x3c
14572  	JB   one_byte_match_emit_encodeSnappyBlockAsm8B
14573  	CMPL R8, $0x00000100
14574  	JB   two_bytes_match_emit_encodeSnappyBlockAsm8B
14575  	JB   three_bytes_match_emit_encodeSnappyBlockAsm8B
14576  
14577  three_bytes_match_emit_encodeSnappyBlockAsm8B:
14578  	MOVB $0xf4, (CX)
14579  	MOVW R8, 1(CX)
14580  	ADDQ $0x03, CX
14581  	JMP  memmove_long_match_emit_encodeSnappyBlockAsm8B
14582  
14583  two_bytes_match_emit_encodeSnappyBlockAsm8B:
14584  	MOVB $0xf0, (CX)
14585  	MOVB R8, 1(CX)
14586  	ADDQ $0x02, CX
14587  	CMPL R8, $0x40
14588  	JB   memmove_match_emit_encodeSnappyBlockAsm8B
14589  	JMP  memmove_long_match_emit_encodeSnappyBlockAsm8B
14590  
14591  one_byte_match_emit_encodeSnappyBlockAsm8B:
14592  	SHLB $0x02, R8
14593  	MOVB R8, (CX)
14594  	ADDQ $0x01, CX
14595  
14596  memmove_match_emit_encodeSnappyBlockAsm8B:
14597  	LEAQ (CX)(R9*1), R8
14598  
14599  	// genMemMoveShort
14600  	CMPQ R9, $0x08
14601  	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8
14602  	CMPQ R9, $0x10
14603  	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
14604  	CMPQ R9, $0x20
14605  	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
14606  	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
14607  
14608  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8:
14609  	MOVQ (DI), R10
14610  	MOVQ R10, (CX)
14611  	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
14612  
14613  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
14614  	MOVQ (DI), R10
14615  	MOVQ -8(DI)(R9*1), DI
14616  	MOVQ R10, (CX)
14617  	MOVQ DI, -8(CX)(R9*1)
14618  	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
14619  
14620  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
14621  	MOVOU (DI), X0
14622  	MOVOU -16(DI)(R9*1), X1
14623  	MOVOU X0, (CX)
14624  	MOVOU X1, -16(CX)(R9*1)
14625  	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
14626  
14627  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
14628  	MOVOU (DI), X0
14629  	MOVOU 16(DI), X1
14630  	MOVOU -32(DI)(R9*1), X2
14631  	MOVOU -16(DI)(R9*1), X3
14632  	MOVOU X0, (CX)
14633  	MOVOU X1, 16(CX)
14634  	MOVOU X2, -32(CX)(R9*1)
14635  	MOVOU X3, -16(CX)(R9*1)
14636  
14637  memmove_end_copy_match_emit_encodeSnappyBlockAsm8B:
14638  	MOVQ R8, CX
14639  	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm8B
14640  
14641  memmove_long_match_emit_encodeSnappyBlockAsm8B:
14642  	LEAQ (CX)(R9*1), R8
14643  
14644  	// genMemMoveLong
14645  	MOVOU (DI), X0
14646  	MOVOU 16(DI), X1
14647  	MOVOU -32(DI)(R9*1), X2
14648  	MOVOU -16(DI)(R9*1), X3
14649  	MOVQ  R9, R11
14650  	SHRQ  $0x05, R11
14651  	MOVQ  CX, R10
14652  	ANDL  $0x0000001f, R10
14653  	MOVQ  $0x00000040, R12
14654  	SUBQ  R10, R12
14655  	DECQ  R11
14656  	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
14657  	LEAQ  -32(DI)(R12*1), R10
14658  	LEAQ  -32(CX)(R12*1), R13
14659  
14660  emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
14661  	MOVOU (R10), X4
14662  	MOVOU 16(R10), X5
14663  	MOVOA X4, (R13)
14664  	MOVOA X5, 16(R13)
14665  	ADDQ  $0x20, R13
14666  	ADDQ  $0x20, R10
14667  	ADDQ  $0x20, R12
14668  	DECQ  R11
14669  	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
14670  
14671  emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
14672  	MOVOU -32(DI)(R12*1), X4
14673  	MOVOU -16(DI)(R12*1), X5
14674  	MOVOA X4, -32(CX)(R12*1)
14675  	MOVOA X5, -16(CX)(R12*1)
14676  	ADDQ  $0x20, R12
14677  	CMPQ  R9, R12
14678  	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
14679  	MOVOU X0, (CX)
14680  	MOVOU X1, 16(CX)
14681  	MOVOU X2, -32(CX)(R9*1)
14682  	MOVOU X3, -16(CX)(R9*1)
14683  	MOVQ  R8, CX
14684  
14685  emit_literal_done_match_emit_encodeSnappyBlockAsm8B:
14686  match_nolit_loop_encodeSnappyBlockAsm8B:
14687  	MOVL DX, DI
14688  	SUBL SI, DI
14689  	MOVL DI, 16(SP)
14690  	ADDL $0x04, DX
14691  	ADDL $0x04, SI
14692  	MOVQ src_len+32(FP), DI
14693  	SUBL DX, DI
14694  	LEAQ (BX)(DX*1), R8
14695  	LEAQ (BX)(SI*1), SI
14696  
14697  	// matchLen
14698  	XORL R10, R10
14699  
14700  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B:
14701  	CMPL DI, $0x10
14702  	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm8B
14703  	MOVQ (R8)(R10*1), R9
14704  	MOVQ 8(R8)(R10*1), R11
14705  	XORQ (SI)(R10*1), R9
14706  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B
14707  	XORQ 8(SI)(R10*1), R11
14708  	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B
14709  	LEAL -16(DI), DI
14710  	LEAL 16(R10), R10
14711  	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B
14712  
14713  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B:
14714  #ifdef GOAMD64_v3
14715  	TZCNTQ R11, R11
14716  
14717  #else
14718  	BSFQ R11, R11
14719  
14720  #endif
14721  	SARQ $0x03, R11
14722  	LEAL 8(R10)(R11*1), R10
14723  	JMP  match_nolit_end_encodeSnappyBlockAsm8B
14724  
14725  matchlen_match8_match_nolit_encodeSnappyBlockAsm8B:
14726  	CMPL DI, $0x08
14727  	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
14728  	MOVQ (R8)(R10*1), R9
14729  	XORQ (SI)(R10*1), R9
14730  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B
14731  	LEAL -8(DI), DI
14732  	LEAL 8(R10), R10
14733  	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
14734  
14735  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B:
14736  #ifdef GOAMD64_v3
14737  	TZCNTQ R9, R9
14738  
14739  #else
14740  	BSFQ R9, R9
14741  
14742  #endif
14743  	SARQ $0x03, R9
14744  	LEAL (R10)(R9*1), R10
14745  	JMP  match_nolit_end_encodeSnappyBlockAsm8B
14746  
14747  matchlen_match4_match_nolit_encodeSnappyBlockAsm8B:
14748  	CMPL DI, $0x04
14749  	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
14750  	MOVL (R8)(R10*1), R9
14751  	CMPL (SI)(R10*1), R9
14752  	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
14753  	LEAL -4(DI), DI
14754  	LEAL 4(R10), R10
14755  
14756  matchlen_match2_match_nolit_encodeSnappyBlockAsm8B:
14757  	CMPL DI, $0x01
14758  	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
14759  	JB   match_nolit_end_encodeSnappyBlockAsm8B
14760  	MOVW (R8)(R10*1), R9
14761  	CMPW (SI)(R10*1), R9
14762  	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
14763  	LEAL 2(R10), R10
14764  	SUBL $0x02, DI
14765  	JZ   match_nolit_end_encodeSnappyBlockAsm8B
14766  
14767  matchlen_match1_match_nolit_encodeSnappyBlockAsm8B:
14768  	MOVB (R8)(R10*1), R9
14769  	CMPB (SI)(R10*1), R9
14770  	JNE  match_nolit_end_encodeSnappyBlockAsm8B
14771  	LEAL 1(R10), R10
14772  
14773  match_nolit_end_encodeSnappyBlockAsm8B:
14774  	ADDL R10, DX
14775  	MOVL 16(SP), SI
14776  	ADDL $0x04, R10
14777  	MOVL DX, 12(SP)
14778  
14779  	// emitCopy
14780  two_byte_offset_match_nolit_encodeSnappyBlockAsm8B:
14781  	CMPL R10, $0x40
14782  	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B
14783  	MOVB $0xee, (CX)
14784  	MOVW SI, 1(CX)
14785  	LEAL -60(R10), R10
14786  	ADDQ $0x03, CX
14787  	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm8B
14788  
14789  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B:
14790  	MOVL R10, DI
14791  	SHLL $0x02, DI
14792  	CMPL R10, $0x0c
14793  	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm8B
14794  	LEAL -15(DI), DI
14795  	MOVB SI, 1(CX)
14796  	SHRL $0x08, SI
14797  	SHLL $0x05, SI
14798  	ORL  SI, DI
14799  	MOVB DI, (CX)
14800  	ADDQ $0x02, CX
14801  	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm8B
14802  
14803  emit_copy_three_match_nolit_encodeSnappyBlockAsm8B:
14804  	LEAL -2(DI), DI
14805  	MOVB DI, (CX)
14806  	MOVW SI, 1(CX)
14807  	ADDQ $0x03, CX
14808  
14809  match_nolit_emitcopy_end_encodeSnappyBlockAsm8B:
14810  	CMPL DX, 8(SP)
14811  	JAE  emit_remainder_encodeSnappyBlockAsm8B
14812  	MOVQ -2(BX)(DX*1), DI
14813  	CMPQ CX, (SP)
14814  	JB   match_nolit_dst_ok_encodeSnappyBlockAsm8B
14815  	MOVQ $0x00000000, ret+56(FP)
14816  	RET
14817  
14818  match_nolit_dst_ok_encodeSnappyBlockAsm8B:
14819  	MOVQ  $0x9e3779b1, R9
14820  	MOVQ  DI, R8
14821  	SHRQ  $0x10, DI
14822  	MOVQ  DI, SI
14823  	SHLQ  $0x20, R8
14824  	IMULQ R9, R8
14825  	SHRQ  $0x38, R8
14826  	SHLQ  $0x20, SI
14827  	IMULQ R9, SI
14828  	SHRQ  $0x38, SI
14829  	LEAL  -2(DX), R9
14830  	LEAQ  (AX)(SI*4), R10
14831  	MOVL  (R10), SI
14832  	MOVL  R9, (AX)(R8*4)
14833  	MOVL  DX, (R10)
14834  	CMPL  (BX)(SI*1), DI
14835  	JEQ   match_nolit_loop_encodeSnappyBlockAsm8B
14836  	INCL  DX
14837  	JMP   search_loop_encodeSnappyBlockAsm8B
14838  
14839  emit_remainder_encodeSnappyBlockAsm8B:
14840  	MOVQ src_len+32(FP), AX
14841  	SUBL 12(SP), AX
14842  	LEAQ 3(CX)(AX*1), AX
14843  	CMPQ AX, (SP)
14844  	JB   emit_remainder_ok_encodeSnappyBlockAsm8B
14845  	MOVQ $0x00000000, ret+56(FP)
14846  	RET
14847  
14848  emit_remainder_ok_encodeSnappyBlockAsm8B:
14849  	MOVQ src_len+32(FP), AX
14850  	MOVL 12(SP), DX
14851  	CMPL DX, AX
14852  	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
14853  	MOVL AX, SI
14854  	MOVL AX, 12(SP)
14855  	LEAQ (BX)(DX*1), AX
14856  	SUBL DX, SI
14857  	LEAL -1(SI), DX
14858  	CMPL DX, $0x3c
14859  	JB   one_byte_emit_remainder_encodeSnappyBlockAsm8B
14860  	CMPL DX, $0x00000100
14861  	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm8B
14862  	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm8B
14863  
14864  three_bytes_emit_remainder_encodeSnappyBlockAsm8B:
14865  	MOVB $0xf4, (CX)
14866  	MOVW DX, 1(CX)
14867  	ADDQ $0x03, CX
14868  	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm8B
14869  
14870  two_bytes_emit_remainder_encodeSnappyBlockAsm8B:
14871  	MOVB $0xf0, (CX)
14872  	MOVB DL, 1(CX)
14873  	ADDQ $0x02, CX
14874  	CMPL DX, $0x40
14875  	JB   memmove_emit_remainder_encodeSnappyBlockAsm8B
14876  	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm8B
14877  
14878  one_byte_emit_remainder_encodeSnappyBlockAsm8B:
14879  	SHLB $0x02, DL
14880  	MOVB DL, (CX)
14881  	ADDQ $0x01, CX
14882  
14883  memmove_emit_remainder_encodeSnappyBlockAsm8B:
14884  	LEAQ (CX)(SI*1), DX
14885  	MOVL SI, BX
14886  
14887  	// genMemMoveShort
14888  	CMPQ BX, $0x03
14889  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2
14890  	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3
14891  	CMPQ BX, $0x08
14892  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7
14893  	CMPQ BX, $0x10
14894  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16
14895  	CMPQ BX, $0x20
14896  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32
14897  	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64
14898  
14899  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2:
14900  	MOVB (AX), SI
14901  	MOVB -1(AX)(BX*1), AL
14902  	MOVB SI, (CX)
14903  	MOVB AL, -1(CX)(BX*1)
14904  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
14905  
14906  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3:
14907  	MOVW (AX), SI
14908  	MOVB 2(AX), AL
14909  	MOVW SI, (CX)
14910  	MOVB AL, 2(CX)
14911  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
14912  
14913  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7:
14914  	MOVL (AX), SI
14915  	MOVL -4(AX)(BX*1), AX
14916  	MOVL SI, (CX)
14917  	MOVL AX, -4(CX)(BX*1)
14918  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
14919  
14920  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16:
14921  	MOVQ (AX), SI
14922  	MOVQ -8(AX)(BX*1), AX
14923  	MOVQ SI, (CX)
14924  	MOVQ AX, -8(CX)(BX*1)
14925  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
14926  
14927  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32:
14928  	MOVOU (AX), X0
14929  	MOVOU -16(AX)(BX*1), X1
14930  	MOVOU X0, (CX)
14931  	MOVOU X1, -16(CX)(BX*1)
14932  	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
14933  
14934  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64:
14935  	MOVOU (AX), X0
14936  	MOVOU 16(AX), X1
14937  	MOVOU -32(AX)(BX*1), X2
14938  	MOVOU -16(AX)(BX*1), X3
14939  	MOVOU X0, (CX)
14940  	MOVOU X1, 16(CX)
14941  	MOVOU X2, -32(CX)(BX*1)
14942  	MOVOU X3, -16(CX)(BX*1)
14943  
14944  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B:
14945  	MOVQ DX, CX
14946  	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
14947  
14948  memmove_long_emit_remainder_encodeSnappyBlockAsm8B:
14949  	LEAQ (CX)(SI*1), DX
14950  	MOVL SI, BX
14951  
14952  	// genMemMoveLong
14953  	MOVOU (AX), X0
14954  	MOVOU 16(AX), X1
14955  	MOVOU -32(AX)(BX*1), X2
14956  	MOVOU -16(AX)(BX*1), X3
14957  	MOVQ  BX, DI
14958  	SHRQ  $0x05, DI
14959  	MOVQ  CX, SI
14960  	ANDL  $0x0000001f, SI
14961  	MOVQ  $0x00000040, R8
14962  	SUBQ  SI, R8
14963  	DECQ  DI
14964  	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
14965  	LEAQ  -32(AX)(R8*1), SI
14966  	LEAQ  -32(CX)(R8*1), R9
14967  
14968  emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back:
14969  	MOVOU (SI), X4
14970  	MOVOU 16(SI), X5
14971  	MOVOA X4, (R9)
14972  	MOVOA X5, 16(R9)
14973  	ADDQ  $0x20, R9
14974  	ADDQ  $0x20, SI
14975  	ADDQ  $0x20, R8
14976  	DECQ  DI
14977  	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back
14978  
14979  emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
14980  	MOVOU -32(AX)(R8*1), X4
14981  	MOVOU -16(AX)(R8*1), X5
14982  	MOVOA X4, -32(CX)(R8*1)
14983  	MOVOA X5, -16(CX)(R8*1)
14984  	ADDQ  $0x20, R8
14985  	CMPQ  BX, R8
14986  	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
14987  	MOVOU X0, (CX)
14988  	MOVOU X1, 16(CX)
14989  	MOVOU X2, -32(CX)(BX*1)
14990  	MOVOU X3, -16(CX)(BX*1)
14991  	MOVQ  DX, CX
14992  
14993  emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B:
14994  	MOVQ dst_base+0(FP), AX
14995  	SUBQ AX, CX
14996  	MOVQ CX, ret+56(FP)
14997  	RET
14998  
14999  // func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int
15000  // Requires: BMI, SSE2
15001  TEXT ·encodeSnappyBetterBlockAsm(SB), $24-64
15002  	MOVQ tmp+48(FP), AX
15003  	MOVQ dst_base+0(FP), CX
15004  	MOVQ $0x00001200, DX
15005  	MOVQ AX, BX
15006  	PXOR X0, X0
15007  
15008  zero_loop_encodeSnappyBetterBlockAsm:
15009  	MOVOU X0, (BX)
15010  	MOVOU X0, 16(BX)
15011  	MOVOU X0, 32(BX)
15012  	MOVOU X0, 48(BX)
15013  	MOVOU X0, 64(BX)
15014  	MOVOU X0, 80(BX)
15015  	MOVOU X0, 96(BX)
15016  	MOVOU X0, 112(BX)
15017  	ADDQ  $0x80, BX
15018  	DECQ  DX
15019  	JNZ   zero_loop_encodeSnappyBetterBlockAsm
15020  	MOVL  $0x00000000, 12(SP)
15021  	MOVQ  src_len+32(FP), DX
15022  	LEAQ  -9(DX), BX
15023  	LEAQ  -8(DX), SI
15024  	MOVL  SI, 8(SP)
15025  	SHRQ  $0x05, DX
15026  	SUBL  DX, BX
15027  	LEAQ  (CX)(BX*1), BX
15028  	MOVQ  BX, (SP)
15029  	MOVL  $0x00000001, DX
15030  	MOVL  $0x00000000, 16(SP)
15031  	MOVQ  src_base+24(FP), BX
15032  
15033  search_loop_encodeSnappyBetterBlockAsm:
15034  	MOVL DX, SI
15035  	SUBL 12(SP), SI
15036  	SHRL $0x07, SI
15037  	CMPL SI, $0x63
15038  	JBE  check_maxskip_ok_encodeSnappyBetterBlockAsm
15039  	LEAL 100(DX), SI
15040  	JMP  check_maxskip_cont_encodeSnappyBetterBlockAsm
15041  
15042  check_maxskip_ok_encodeSnappyBetterBlockAsm:
15043  	LEAL 1(DX)(SI*1), SI
15044  
15045  check_maxskip_cont_encodeSnappyBetterBlockAsm:
15046  	CMPL  SI, 8(SP)
15047  	JAE   emit_remainder_encodeSnappyBetterBlockAsm
15048  	MOVQ  (BX)(DX*1), DI
15049  	MOVL  SI, 20(SP)
15050  	MOVQ  $0x00cf1bbcdcbfa563, R9
15051  	MOVQ  $0x9e3779b1, SI
15052  	MOVQ  DI, R10
15053  	MOVQ  DI, R11
15054  	SHLQ  $0x08, R10
15055  	IMULQ R9, R10
15056  	SHRQ  $0x2f, R10
15057  	SHLQ  $0x20, R11
15058  	IMULQ SI, R11
15059  	SHRQ  $0x32, R11
15060  	MOVL  (AX)(R10*4), SI
15061  	MOVL  524288(AX)(R11*4), R8
15062  	MOVL  DX, (AX)(R10*4)
15063  	MOVL  DX, 524288(AX)(R11*4)
15064  	MOVQ  (BX)(SI*1), R10
15065  	MOVQ  (BX)(R8*1), R11
15066  	CMPQ  R10, DI
15067  	JEQ   candidate_match_encodeSnappyBetterBlockAsm
15068  	CMPQ  R11, DI
15069  	JNE   no_short_found_encodeSnappyBetterBlockAsm
15070  	MOVL  R8, SI
15071  	JMP   candidate_match_encodeSnappyBetterBlockAsm
15072  
15073  no_short_found_encodeSnappyBetterBlockAsm:
15074  	CMPL R10, DI
15075  	JEQ  candidate_match_encodeSnappyBetterBlockAsm
15076  	CMPL R11, DI
15077  	JEQ  candidateS_match_encodeSnappyBetterBlockAsm
15078  	MOVL 20(SP), DX
15079  	JMP  search_loop_encodeSnappyBetterBlockAsm
15080  
15081  candidateS_match_encodeSnappyBetterBlockAsm:
15082  	SHRQ  $0x08, DI
15083  	MOVQ  DI, R10
15084  	SHLQ  $0x08, R10
15085  	IMULQ R9, R10
15086  	SHRQ  $0x2f, R10
15087  	MOVL  (AX)(R10*4), SI
15088  	INCL  DX
15089  	MOVL  DX, (AX)(R10*4)
15090  	CMPL  (BX)(SI*1), DI
15091  	JEQ   candidate_match_encodeSnappyBetterBlockAsm
15092  	DECL  DX
15093  	MOVL  R8, SI
15094  
15095  candidate_match_encodeSnappyBetterBlockAsm:
15096  	MOVL  12(SP), DI
15097  	TESTL SI, SI
15098  	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm
15099  
15100  match_extend_back_loop_encodeSnappyBetterBlockAsm:
15101  	CMPL DX, DI
15102  	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm
15103  	MOVB -1(BX)(SI*1), R8
15104  	MOVB -1(BX)(DX*1), R9
15105  	CMPB R8, R9
15106  	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm
15107  	LEAL -1(DX), DX
15108  	DECL SI
15109  	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm
15110  	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm
15111  
15112  match_extend_back_end_encodeSnappyBetterBlockAsm:
15113  	MOVL DX, DI
15114  	SUBL 12(SP), DI
15115  	LEAQ 5(CX)(DI*1), DI
15116  	CMPQ DI, (SP)
15117  	JB   match_dst_size_check_encodeSnappyBetterBlockAsm
15118  	MOVQ $0x00000000, ret+56(FP)
15119  	RET
15120  
15121  match_dst_size_check_encodeSnappyBetterBlockAsm:
15122  	MOVL DX, DI
15123  	ADDL $0x04, DX
15124  	ADDL $0x04, SI
15125  	MOVQ src_len+32(FP), R8
15126  	SUBL DX, R8
15127  	LEAQ (BX)(DX*1), R9
15128  	LEAQ (BX)(SI*1), R10
15129  
15130  	// matchLen
15131  	XORL R12, R12
15132  
15133  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm:
15134  	CMPL R8, $0x10
15135  	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm
15136  	MOVQ (R9)(R12*1), R11
15137  	MOVQ 8(R9)(R12*1), R13
15138  	XORQ (R10)(R12*1), R11
15139  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm
15140  	XORQ 8(R10)(R12*1), R13
15141  	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm
15142  	LEAL -16(R8), R8
15143  	LEAL 16(R12), R12
15144  	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm
15145  
15146  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm:
15147  #ifdef GOAMD64_v3
15148  	TZCNTQ R13, R13
15149  
15150  #else
15151  	BSFQ R13, R13
15152  
15153  #endif
15154  	SARQ $0x03, R13
15155  	LEAL 8(R12)(R13*1), R12
15156  	JMP  match_nolit_end_encodeSnappyBetterBlockAsm
15157  
15158  matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm:
15159  	CMPL R8, $0x08
15160  	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
15161  	MOVQ (R9)(R12*1), R11
15162  	XORQ (R10)(R12*1), R11
15163  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm
15164  	LEAL -8(R8), R8
15165  	LEAL 8(R12), R12
15166  	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
15167  
15168  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm:
15169  #ifdef GOAMD64_v3
15170  	TZCNTQ R11, R11
15171  
15172  #else
15173  	BSFQ R11, R11
15174  
15175  #endif
15176  	SARQ $0x03, R11
15177  	LEAL (R12)(R11*1), R12
15178  	JMP  match_nolit_end_encodeSnappyBetterBlockAsm
15179  
15180  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm:
15181  	CMPL R8, $0x04
15182  	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
15183  	MOVL (R9)(R12*1), R11
15184  	CMPL (R10)(R12*1), R11
15185  	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
15186  	LEAL -4(R8), R8
15187  	LEAL 4(R12), R12
15188  
15189  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm:
15190  	CMPL R8, $0x01
15191  	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
15192  	JB   match_nolit_end_encodeSnappyBetterBlockAsm
15193  	MOVW (R9)(R12*1), R11
15194  	CMPW (R10)(R12*1), R11
15195  	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
15196  	LEAL 2(R12), R12
15197  	SUBL $0x02, R8
15198  	JZ   match_nolit_end_encodeSnappyBetterBlockAsm
15199  
15200  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm:
15201  	MOVB (R9)(R12*1), R11
15202  	CMPB (R10)(R12*1), R11
15203  	JNE  match_nolit_end_encodeSnappyBetterBlockAsm
15204  	LEAL 1(R12), R12
15205  
15206  match_nolit_end_encodeSnappyBetterBlockAsm:
15207  	MOVL DX, R8
15208  	SUBL SI, R8
15209  
15210  	// Check if repeat
15211  	CMPL R12, $0x01
15212  	JA   match_length_ok_encodeSnappyBetterBlockAsm
15213  	CMPL R8, $0x0000ffff
15214  	JBE  match_length_ok_encodeSnappyBetterBlockAsm
15215  	MOVL 20(SP), DX
15216  	INCL DX
15217  	JMP  search_loop_encodeSnappyBetterBlockAsm
15218  
15219  match_length_ok_encodeSnappyBetterBlockAsm:
15220  	MOVL R8, 16(SP)
15221  	MOVL 12(SP), SI
15222  	CMPL SI, DI
15223  	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
15224  	MOVL DI, R9
15225  	MOVL DI, 12(SP)
15226  	LEAQ (BX)(SI*1), R10
15227  	SUBL SI, R9
15228  	LEAL -1(R9), SI
15229  	CMPL SI, $0x3c
15230  	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm
15231  	CMPL SI, $0x00000100
15232  	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm
15233  	CMPL SI, $0x00010000
15234  	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm
15235  	CMPL SI, $0x01000000
15236  	JB   four_bytes_match_emit_encodeSnappyBetterBlockAsm
15237  	MOVB $0xfc, (CX)
15238  	MOVL SI, 1(CX)
15239  	ADDQ $0x05, CX
15240  	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
15241  
15242  four_bytes_match_emit_encodeSnappyBetterBlockAsm:
15243  	MOVL SI, R11
15244  	SHRL $0x10, R11
15245  	MOVB $0xf8, (CX)
15246  	MOVW SI, 1(CX)
15247  	MOVB R11, 3(CX)
15248  	ADDQ $0x04, CX
15249  	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
15250  
15251  three_bytes_match_emit_encodeSnappyBetterBlockAsm:
15252  	MOVB $0xf4, (CX)
15253  	MOVW SI, 1(CX)
15254  	ADDQ $0x03, CX
15255  	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
15256  
15257  two_bytes_match_emit_encodeSnappyBetterBlockAsm:
15258  	MOVB $0xf0, (CX)
15259  	MOVB SI, 1(CX)
15260  	ADDQ $0x02, CX
15261  	CMPL SI, $0x40
15262  	JB   memmove_match_emit_encodeSnappyBetterBlockAsm
15263  	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
15264  
15265  one_byte_match_emit_encodeSnappyBetterBlockAsm:
15266  	SHLB $0x02, SI
15267  	MOVB SI, (CX)
15268  	ADDQ $0x01, CX
15269  
15270  memmove_match_emit_encodeSnappyBetterBlockAsm:
15271  	LEAQ (CX)(R9*1), SI
15272  
15273  	// genMemMoveShort
15274  	CMPQ R9, $0x08
15275  	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8
15276  	CMPQ R9, $0x10
15277  	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16
15278  	CMPQ R9, $0x20
15279  	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32
15280  	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64
15281  
15282  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8:
15283  	MOVQ (R10), R11
15284  	MOVQ R11, (CX)
15285  	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
15286  
15287  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16:
15288  	MOVQ (R10), R11
15289  	MOVQ -8(R10)(R9*1), R10
15290  	MOVQ R11, (CX)
15291  	MOVQ R10, -8(CX)(R9*1)
15292  	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
15293  
15294  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32:
15295  	MOVOU (R10), X0
15296  	MOVOU -16(R10)(R9*1), X1
15297  	MOVOU X0, (CX)
15298  	MOVOU X1, -16(CX)(R9*1)
15299  	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
15300  
15301  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64:
15302  	MOVOU (R10), X0
15303  	MOVOU 16(R10), X1
15304  	MOVOU -32(R10)(R9*1), X2
15305  	MOVOU -16(R10)(R9*1), X3
15306  	MOVOU X0, (CX)
15307  	MOVOU X1, 16(CX)
15308  	MOVOU X2, -32(CX)(R9*1)
15309  	MOVOU X3, -16(CX)(R9*1)
15310  
15311  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm:
15312  	MOVQ SI, CX
15313  	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
15314  
15315  memmove_long_match_emit_encodeSnappyBetterBlockAsm:
15316  	LEAQ (CX)(R9*1), SI
15317  
15318  	// genMemMoveLong
15319  	MOVOU (R10), X0
15320  	MOVOU 16(R10), X1
15321  	MOVOU -32(R10)(R9*1), X2
15322  	MOVOU -16(R10)(R9*1), X3
15323  	MOVQ  R9, R13
15324  	SHRQ  $0x05, R13
15325  	MOVQ  CX, R11
15326  	ANDL  $0x0000001f, R11
15327  	MOVQ  $0x00000040, R14
15328  	SUBQ  R11, R14
15329  	DECQ  R13
15330  	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
15331  	LEAQ  -32(R10)(R14*1), R11
15332  	LEAQ  -32(CX)(R14*1), R15
15333  
15334  emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back:
15335  	MOVOU (R11), X4
15336  	MOVOU 16(R11), X5
15337  	MOVOA X4, (R15)
15338  	MOVOA X5, 16(R15)
15339  	ADDQ  $0x20, R15
15340  	ADDQ  $0x20, R11
15341  	ADDQ  $0x20, R14
15342  	DECQ  R13
15343  	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back
15344  
15345  emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
15346  	MOVOU -32(R10)(R14*1), X4
15347  	MOVOU -16(R10)(R14*1), X5
15348  	MOVOA X4, -32(CX)(R14*1)
15349  	MOVOA X5, -16(CX)(R14*1)
15350  	ADDQ  $0x20, R14
15351  	CMPQ  R9, R14
15352  	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
15353  	MOVOU X0, (CX)
15354  	MOVOU X1, 16(CX)
15355  	MOVOU X2, -32(CX)(R9*1)
15356  	MOVOU X3, -16(CX)(R9*1)
15357  	MOVQ  SI, CX
15358  
15359  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm:
15360  	ADDL R12, DX
15361  	ADDL $0x04, R12
15362  	MOVL DX, 12(SP)
15363  
15364  	// emitCopy
15365  	CMPL R8, $0x00010000
15366  	JB   two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
15367  
15368  four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm:
15369  	CMPL R12, $0x40
15370  	JBE  four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
15371  	MOVB $0xff, (CX)
15372  	MOVL R8, 1(CX)
15373  	LEAL -64(R12), R12
15374  	ADDQ $0x05, CX
15375  	CMPL R12, $0x04
15376  	JB   four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
15377  	JMP  four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm
15378  
15379  four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm:
15380  	TESTL R12, R12
15381  	JZ    match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
15382  	XORL  SI, SI
15383  	LEAL  -1(SI)(R12*4), R12
15384  	MOVB  R12, (CX)
15385  	MOVL  R8, 1(CX)
15386  	ADDQ  $0x05, CX
15387  	JMP   match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
15388  
15389  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm:
15390  	CMPL R12, $0x40
15391  	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm
15392  	MOVB $0xee, (CX)
15393  	MOVW R8, 1(CX)
15394  	LEAL -60(R12), R12
15395  	ADDQ $0x03, CX
15396  	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
15397  
15398  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm:
15399  	MOVL R12, SI
15400  	SHLL $0x02, SI
15401  	CMPL R12, $0x0c
15402  	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
15403  	CMPL R8, $0x00000800
15404  	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
15405  	LEAL -15(SI), SI
15406  	MOVB R8, 1(CX)
15407  	SHRL $0x08, R8
15408  	SHLL $0x05, R8
15409  	ORL  R8, SI
15410  	MOVB SI, (CX)
15411  	ADDQ $0x02, CX
15412  	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
15413  
15414  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm:
15415  	LEAL -2(SI), SI
15416  	MOVB SI, (CX)
15417  	MOVW R8, 1(CX)
15418  	ADDQ $0x03, CX
15419  
15420  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm:
15421  	CMPL DX, 8(SP)
15422  	JAE  emit_remainder_encodeSnappyBetterBlockAsm
15423  	CMPQ CX, (SP)
15424  	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm
15425  	MOVQ $0x00000000, ret+56(FP)
15426  	RET
15427  
15428  match_nolit_dst_ok_encodeSnappyBetterBlockAsm:
15429  	MOVQ  $0x00cf1bbcdcbfa563, SI
15430  	MOVQ  $0x9e3779b1, R8
15431  	LEAQ  1(DI), DI
15432  	LEAQ  -2(DX), R9
15433  	MOVQ  (BX)(DI*1), R10
15434  	MOVQ  1(BX)(DI*1), R11
15435  	MOVQ  (BX)(R9*1), R12
15436  	MOVQ  1(BX)(R9*1), R13
15437  	SHLQ  $0x08, R10
15438  	IMULQ SI, R10
15439  	SHRQ  $0x2f, R10
15440  	SHLQ  $0x20, R11
15441  	IMULQ R8, R11
15442  	SHRQ  $0x32, R11
15443  	SHLQ  $0x08, R12
15444  	IMULQ SI, R12
15445  	SHRQ  $0x2f, R12
15446  	SHLQ  $0x20, R13
15447  	IMULQ R8, R13
15448  	SHRQ  $0x32, R13
15449  	LEAQ  1(DI), R8
15450  	LEAQ  1(R9), R14
15451  	MOVL  DI, (AX)(R10*4)
15452  	MOVL  R9, (AX)(R12*4)
15453  	MOVL  R8, 524288(AX)(R11*4)
15454  	MOVL  R14, 524288(AX)(R13*4)
15455  	LEAQ  1(R9)(DI*1), R8
15456  	SHRQ  $0x01, R8
15457  	ADDQ  $0x01, DI
15458  	SUBQ  $0x01, R9
15459  
15460  index_loop_encodeSnappyBetterBlockAsm:
15461  	CMPQ  R8, R9
15462  	JAE   search_loop_encodeSnappyBetterBlockAsm
15463  	MOVQ  (BX)(DI*1), R10
15464  	MOVQ  (BX)(R8*1), R11
15465  	SHLQ  $0x08, R10
15466  	IMULQ SI, R10
15467  	SHRQ  $0x2f, R10
15468  	SHLQ  $0x08, R11
15469  	IMULQ SI, R11
15470  	SHRQ  $0x2f, R11
15471  	MOVL  DI, (AX)(R10*4)
15472  	MOVL  R8, (AX)(R11*4)
15473  	ADDQ  $0x02, DI
15474  	ADDQ  $0x02, R8
15475  	JMP   index_loop_encodeSnappyBetterBlockAsm
15476  
15477  emit_remainder_encodeSnappyBetterBlockAsm:
15478  	MOVQ src_len+32(FP), AX
15479  	SUBL 12(SP), AX
15480  	LEAQ 5(CX)(AX*1), AX
15481  	CMPQ AX, (SP)
15482  	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm
15483  	MOVQ $0x00000000, ret+56(FP)
15484  	RET
15485  
15486  emit_remainder_ok_encodeSnappyBetterBlockAsm:
15487  	MOVQ src_len+32(FP), AX
15488  	MOVL 12(SP), DX
15489  	CMPL DX, AX
15490  	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
15491  	MOVL AX, SI
15492  	MOVL AX, 12(SP)
15493  	LEAQ (BX)(DX*1), AX
15494  	SUBL DX, SI
15495  	LEAL -1(SI), DX
15496  	CMPL DX, $0x3c
15497  	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm
15498  	CMPL DX, $0x00000100
15499  	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm
15500  	CMPL DX, $0x00010000
15501  	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm
15502  	CMPL DX, $0x01000000
15503  	JB   four_bytes_emit_remainder_encodeSnappyBetterBlockAsm
15504  	MOVB $0xfc, (CX)
15505  	MOVL DX, 1(CX)
15506  	ADDQ $0x05, CX
15507  	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
15508  
15509  four_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
15510  	MOVL DX, BX
15511  	SHRL $0x10, BX
15512  	MOVB $0xf8, (CX)
15513  	MOVW DX, 1(CX)
15514  	MOVB BL, 3(CX)
15515  	ADDQ $0x04, CX
15516  	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
15517  
15518  three_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
15519  	MOVB $0xf4, (CX)
15520  	MOVW DX, 1(CX)
15521  	ADDQ $0x03, CX
15522  	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
15523  
15524  two_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
15525  	MOVB $0xf0, (CX)
15526  	MOVB DL, 1(CX)
15527  	ADDQ $0x02, CX
15528  	CMPL DX, $0x40
15529  	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm
15530  	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
15531  
15532  one_byte_emit_remainder_encodeSnappyBetterBlockAsm:
15533  	SHLB $0x02, DL
15534  	MOVB DL, (CX)
15535  	ADDQ $0x01, CX
15536  
15537  memmove_emit_remainder_encodeSnappyBetterBlockAsm:
15538  	LEAQ (CX)(SI*1), DX
15539  	MOVL SI, BX
15540  
15541  	// genMemMoveShort
15542  	CMPQ BX, $0x03
15543  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2
15544  	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3
15545  	CMPQ BX, $0x08
15546  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7
15547  	CMPQ BX, $0x10
15548  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16
15549  	CMPQ BX, $0x20
15550  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32
15551  	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64
15552  
15553  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2:
15554  	MOVB (AX), SI
15555  	MOVB -1(AX)(BX*1), AL
15556  	MOVB SI, (CX)
15557  	MOVB AL, -1(CX)(BX*1)
15558  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
15559  
15560  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3:
15561  	MOVW (AX), SI
15562  	MOVB 2(AX), AL
15563  	MOVW SI, (CX)
15564  	MOVB AL, 2(CX)
15565  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
15566  
15567  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7:
15568  	MOVL (AX), SI
15569  	MOVL -4(AX)(BX*1), AX
15570  	MOVL SI, (CX)
15571  	MOVL AX, -4(CX)(BX*1)
15572  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
15573  
15574  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16:
15575  	MOVQ (AX), SI
15576  	MOVQ -8(AX)(BX*1), AX
15577  	MOVQ SI, (CX)
15578  	MOVQ AX, -8(CX)(BX*1)
15579  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
15580  
15581  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32:
15582  	MOVOU (AX), X0
15583  	MOVOU -16(AX)(BX*1), X1
15584  	MOVOU X0, (CX)
15585  	MOVOU X1, -16(CX)(BX*1)
15586  	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
15587  
15588  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64:
15589  	MOVOU (AX), X0
15590  	MOVOU 16(AX), X1
15591  	MOVOU -32(AX)(BX*1), X2
15592  	MOVOU -16(AX)(BX*1), X3
15593  	MOVOU X0, (CX)
15594  	MOVOU X1, 16(CX)
15595  	MOVOU X2, -32(CX)(BX*1)
15596  	MOVOU X3, -16(CX)(BX*1)
15597  
15598  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm:
15599  	MOVQ DX, CX
15600  	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
15601  
15602  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm:
15603  	LEAQ (CX)(SI*1), DX
15604  	MOVL SI, BX
15605  
15606  	// genMemMoveLong
15607  	MOVOU (AX), X0
15608  	MOVOU 16(AX), X1
15609  	MOVOU -32(AX)(BX*1), X2
15610  	MOVOU -16(AX)(BX*1), X3
15611  	MOVQ  BX, DI
15612  	SHRQ  $0x05, DI
15613  	MOVQ  CX, SI
15614  	ANDL  $0x0000001f, SI
15615  	MOVQ  $0x00000040, R8
15616  	SUBQ  SI, R8
15617  	DECQ  DI
15618  	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
15619  	LEAQ  -32(AX)(R8*1), SI
15620  	LEAQ  -32(CX)(R8*1), R9
15621  
15622  emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back:
15623  	MOVOU (SI), X4
15624  	MOVOU 16(SI), X5
15625  	MOVOA X4, (R9)
15626  	MOVOA X5, 16(R9)
15627  	ADDQ  $0x20, R9
15628  	ADDQ  $0x20, SI
15629  	ADDQ  $0x20, R8
15630  	DECQ  DI
15631  	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back
15632  
15633  emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
15634  	MOVOU -32(AX)(R8*1), X4
15635  	MOVOU -16(AX)(R8*1), X5
15636  	MOVOA X4, -32(CX)(R8*1)
15637  	MOVOA X5, -16(CX)(R8*1)
15638  	ADDQ  $0x20, R8
15639  	CMPQ  BX, R8
15640  	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
15641  	MOVOU X0, (CX)
15642  	MOVOU X1, 16(CX)
15643  	MOVOU X2, -32(CX)(BX*1)
15644  	MOVOU X3, -16(CX)(BX*1)
15645  	MOVQ  DX, CX
15646  
15647  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm:
15648  	MOVQ dst_base+0(FP), AX
15649  	SUBQ AX, CX
15650  	MOVQ CX, ret+56(FP)
15651  	RET
15652  
15653  // func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int
15654  // Requires: BMI, SSE2
15655  TEXT ·encodeSnappyBetterBlockAsm64K(SB), $24-64
15656  	MOVQ tmp+48(FP), AX
15657  	MOVQ dst_base+0(FP), CX
15658  	MOVQ $0x00000900, DX
15659  	MOVQ AX, BX
15660  	PXOR X0, X0
15661  
15662  zero_loop_encodeSnappyBetterBlockAsm64K:
15663  	MOVOU X0, (BX)
15664  	MOVOU X0, 16(BX)
15665  	MOVOU X0, 32(BX)
15666  	MOVOU X0, 48(BX)
15667  	MOVOU X0, 64(BX)
15668  	MOVOU X0, 80(BX)
15669  	MOVOU X0, 96(BX)
15670  	MOVOU X0, 112(BX)
15671  	ADDQ  $0x80, BX
15672  	DECQ  DX
15673  	JNZ   zero_loop_encodeSnappyBetterBlockAsm64K
15674  	MOVL  $0x00000000, 12(SP)
15675  	MOVQ  src_len+32(FP), DX
15676  	LEAQ  -9(DX), BX
15677  	LEAQ  -8(DX), SI
15678  	MOVL  SI, 8(SP)
15679  	SHRQ  $0x05, DX
15680  	SUBL  DX, BX
15681  	LEAQ  (CX)(BX*1), BX
15682  	MOVQ  BX, (SP)
15683  	MOVL  $0x00000001, DX
15684  	MOVL  $0x00000000, 16(SP)
15685  	MOVQ  src_base+24(FP), BX
15686  
15687  search_loop_encodeSnappyBetterBlockAsm64K:
15688  	MOVL  DX, SI
15689  	SUBL  12(SP), SI
15690  	SHRL  $0x07, SI
15691  	LEAL  1(DX)(SI*1), SI
15692  	CMPL  SI, 8(SP)
15693  	JAE   emit_remainder_encodeSnappyBetterBlockAsm64K
15694  	MOVQ  (BX)(DX*1), DI
15695  	MOVL  SI, 20(SP)
15696  	MOVQ  $0x00cf1bbcdcbfa563, R9
15697  	MOVQ  $0x9e3779b1, SI
15698  	MOVQ  DI, R10
15699  	MOVQ  DI, R11
15700  	SHLQ  $0x08, R10
15701  	IMULQ R9, R10
15702  	SHRQ  $0x30, R10
15703  	SHLQ  $0x20, R11
15704  	IMULQ SI, R11
15705  	SHRQ  $0x33, R11
15706  	MOVL  (AX)(R10*4), SI
15707  	MOVL  262144(AX)(R11*4), R8
15708  	MOVL  DX, (AX)(R10*4)
15709  	MOVL  DX, 262144(AX)(R11*4)
15710  	MOVQ  (BX)(SI*1), R10
15711  	MOVQ  (BX)(R8*1), R11
15712  	CMPQ  R10, DI
15713  	JEQ   candidate_match_encodeSnappyBetterBlockAsm64K
15714  	CMPQ  R11, DI
15715  	JNE   no_short_found_encodeSnappyBetterBlockAsm64K
15716  	MOVL  R8, SI
15717  	JMP   candidate_match_encodeSnappyBetterBlockAsm64K
15718  
15719  no_short_found_encodeSnappyBetterBlockAsm64K:
15720  	CMPL R10, DI
15721  	JEQ  candidate_match_encodeSnappyBetterBlockAsm64K
15722  	CMPL R11, DI
15723  	JEQ  candidateS_match_encodeSnappyBetterBlockAsm64K
15724  	MOVL 20(SP), DX
15725  	JMP  search_loop_encodeSnappyBetterBlockAsm64K
15726  
15727  candidateS_match_encodeSnappyBetterBlockAsm64K:
15728  	SHRQ  $0x08, DI
15729  	MOVQ  DI, R10
15730  	SHLQ  $0x08, R10
15731  	IMULQ R9, R10
15732  	SHRQ  $0x30, R10
15733  	MOVL  (AX)(R10*4), SI
15734  	INCL  DX
15735  	MOVL  DX, (AX)(R10*4)
15736  	CMPL  (BX)(SI*1), DI
15737  	JEQ   candidate_match_encodeSnappyBetterBlockAsm64K
15738  	DECL  DX
15739  	MOVL  R8, SI
15740  
15741  candidate_match_encodeSnappyBetterBlockAsm64K:
15742  	MOVL  12(SP), DI
15743  	TESTL SI, SI
15744  	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm64K
15745  
15746  match_extend_back_loop_encodeSnappyBetterBlockAsm64K:
15747  	CMPL DX, DI
15748  	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm64K
15749  	MOVB -1(BX)(SI*1), R8
15750  	MOVB -1(BX)(DX*1), R9
15751  	CMPB R8, R9
15752  	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm64K
15753  	LEAL -1(DX), DX
15754  	DECL SI
15755  	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm64K
15756  	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm64K
15757  
15758  match_extend_back_end_encodeSnappyBetterBlockAsm64K:
15759  	MOVL DX, DI
15760  	SUBL 12(SP), DI
15761  	LEAQ 3(CX)(DI*1), DI
15762  	CMPQ DI, (SP)
15763  	JB   match_dst_size_check_encodeSnappyBetterBlockAsm64K
15764  	MOVQ $0x00000000, ret+56(FP)
15765  	RET
15766  
15767  match_dst_size_check_encodeSnappyBetterBlockAsm64K:
15768  	MOVL DX, DI
15769  	ADDL $0x04, DX
15770  	ADDL $0x04, SI
15771  	MOVQ src_len+32(FP), R8
15772  	SUBL DX, R8
15773  	LEAQ (BX)(DX*1), R9
15774  	LEAQ (BX)(SI*1), R10
15775  
15776  	// matchLen
15777  	XORL R12, R12
15778  
15779  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K:
15780  	CMPL R8, $0x10
15781  	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K
15782  	MOVQ (R9)(R12*1), R11
15783  	MOVQ 8(R9)(R12*1), R13
15784  	XORQ (R10)(R12*1), R11
15785  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K
15786  	XORQ 8(R10)(R12*1), R13
15787  	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K
15788  	LEAL -16(R8), R8
15789  	LEAL 16(R12), R12
15790  	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K
15791  
15792  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K:
15793  #ifdef GOAMD64_v3
15794  	TZCNTQ R13, R13
15795  
15796  #else
15797  	BSFQ R13, R13
15798  
15799  #endif
15800  	SARQ $0x03, R13
15801  	LEAL 8(R12)(R13*1), R12
15802  	JMP  match_nolit_end_encodeSnappyBetterBlockAsm64K
15803  
15804  matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K:
15805  	CMPL R8, $0x08
15806  	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
15807  	MOVQ (R9)(R12*1), R11
15808  	XORQ (R10)(R12*1), R11
15809  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K
15810  	LEAL -8(R8), R8
15811  	LEAL 8(R12), R12
15812  	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
15813  
15814  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K:
15815  #ifdef GOAMD64_v3
15816  	TZCNTQ R11, R11
15817  
15818  #else
15819  	BSFQ R11, R11
15820  
15821  #endif
15822  	SARQ $0x03, R11
15823  	LEAL (R12)(R11*1), R12
15824  	JMP  match_nolit_end_encodeSnappyBetterBlockAsm64K
15825  
15826  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K:
15827  	CMPL R8, $0x04
15828  	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
15829  	MOVL (R9)(R12*1), R11
15830  	CMPL (R10)(R12*1), R11
15831  	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
15832  	LEAL -4(R8), R8
15833  	LEAL 4(R12), R12
15834  
15835  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K:
15836  	CMPL R8, $0x01
15837  	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
15838  	JB   match_nolit_end_encodeSnappyBetterBlockAsm64K
15839  	MOVW (R9)(R12*1), R11
15840  	CMPW (R10)(R12*1), R11
15841  	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
15842  	LEAL 2(R12), R12
15843  	SUBL $0x02, R8
15844  	JZ   match_nolit_end_encodeSnappyBetterBlockAsm64K
15845  
15846  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K:
15847  	MOVB (R9)(R12*1), R11
15848  	CMPB (R10)(R12*1), R11
15849  	JNE  match_nolit_end_encodeSnappyBetterBlockAsm64K
15850  	LEAL 1(R12), R12
15851  
15852  match_nolit_end_encodeSnappyBetterBlockAsm64K:
15853  	MOVL DX, R8
15854  	SUBL SI, R8
15855  
15856  	// Check if repeat
15857  	MOVL R8, 16(SP)
15858  	MOVL 12(SP), SI
15859  	CMPL SI, DI
15860  	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
15861  	MOVL DI, R9
15862  	MOVL DI, 12(SP)
15863  	LEAQ (BX)(SI*1), R10
15864  	SUBL SI, R9
15865  	LEAL -1(R9), SI
15866  	CMPL SI, $0x3c
15867  	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm64K
15868  	CMPL SI, $0x00000100
15869  	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm64K
15870  	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm64K
15871  
15872  three_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
15873  	MOVB $0xf4, (CX)
15874  	MOVW SI, 1(CX)
15875  	ADDQ $0x03, CX
15876  	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
15877  
15878  two_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
15879  	MOVB $0xf0, (CX)
15880  	MOVB SI, 1(CX)
15881  	ADDQ $0x02, CX
15882  	CMPL SI, $0x40
15883  	JB   memmove_match_emit_encodeSnappyBetterBlockAsm64K
15884  	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
15885  
15886  one_byte_match_emit_encodeSnappyBetterBlockAsm64K:
15887  	SHLB $0x02, SI
15888  	MOVB SI, (CX)
15889  	ADDQ $0x01, CX
15890  
15891  memmove_match_emit_encodeSnappyBetterBlockAsm64K:
15892  	LEAQ (CX)(R9*1), SI
15893  
15894  	// genMemMoveShort
15895  	CMPQ R9, $0x08
15896  	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8
15897  	CMPQ R9, $0x10
15898  	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
15899  	CMPQ R9, $0x20
15900  	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
15901  	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
15902  
15903  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8:
15904  	MOVQ (R10), R11
15905  	MOVQ R11, (CX)
15906  	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
15907  
15908  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
15909  	MOVQ (R10), R11
15910  	MOVQ -8(R10)(R9*1), R10
15911  	MOVQ R11, (CX)
15912  	MOVQ R10, -8(CX)(R9*1)
15913  	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
15914  
15915  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
15916  	MOVOU (R10), X0
15917  	MOVOU -16(R10)(R9*1), X1
15918  	MOVOU X0, (CX)
15919  	MOVOU X1, -16(CX)(R9*1)
15920  	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
15921  
15922  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
15923  	MOVOU (R10), X0
15924  	MOVOU 16(R10), X1
15925  	MOVOU -32(R10)(R9*1), X2
15926  	MOVOU -16(R10)(R9*1), X3
15927  	MOVOU X0, (CX)
15928  	MOVOU X1, 16(CX)
15929  	MOVOU X2, -32(CX)(R9*1)
15930  	MOVOU X3, -16(CX)(R9*1)
15931  
15932  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K:
15933  	MOVQ SI, CX
15934  	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
15935  
15936  memmove_long_match_emit_encodeSnappyBetterBlockAsm64K:
15937  	LEAQ (CX)(R9*1), SI
15938  
15939  	// genMemMoveLong
15940  	MOVOU (R10), X0
15941  	MOVOU 16(R10), X1
15942  	MOVOU -32(R10)(R9*1), X2
15943  	MOVOU -16(R10)(R9*1), X3
15944  	MOVQ  R9, R13
15945  	SHRQ  $0x05, R13
15946  	MOVQ  CX, R11
15947  	ANDL  $0x0000001f, R11
15948  	MOVQ  $0x00000040, R14
15949  	SUBQ  R11, R14
15950  	DECQ  R13
15951  	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
15952  	LEAQ  -32(R10)(R14*1), R11
15953  	LEAQ  -32(CX)(R14*1), R15
15954  
15955  emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
15956  	MOVOU (R11), X4
15957  	MOVOU 16(R11), X5
15958  	MOVOA X4, (R15)
15959  	MOVOA X5, 16(R15)
15960  	ADDQ  $0x20, R15
15961  	ADDQ  $0x20, R11
15962  	ADDQ  $0x20, R14
15963  	DECQ  R13
15964  	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
15965  
15966  emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
15967  	MOVOU -32(R10)(R14*1), X4
15968  	MOVOU -16(R10)(R14*1), X5
15969  	MOVOA X4, -32(CX)(R14*1)
15970  	MOVOA X5, -16(CX)(R14*1)
15971  	ADDQ  $0x20, R14
15972  	CMPQ  R9, R14
15973  	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
15974  	MOVOU X0, (CX)
15975  	MOVOU X1, 16(CX)
15976  	MOVOU X2, -32(CX)(R9*1)
15977  	MOVOU X3, -16(CX)(R9*1)
15978  	MOVQ  SI, CX
15979  
15980  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K:
15981  	ADDL R12, DX
15982  	ADDL $0x04, R12
15983  	MOVL DX, 12(SP)
15984  
15985  	// emitCopy
15986  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K:
15987  	CMPL R12, $0x40
15988  	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K
15989  	MOVB $0xee, (CX)
15990  	MOVW R8, 1(CX)
15991  	LEAL -60(R12), R12
15992  	ADDQ $0x03, CX
15993  	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K
15994  
15995  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K:
15996  	MOVL R12, SI
15997  	SHLL $0x02, SI
15998  	CMPL R12, $0x0c
15999  	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
16000  	CMPL R8, $0x00000800
16001  	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
16002  	LEAL -15(SI), SI
16003  	MOVB R8, 1(CX)
16004  	SHRL $0x08, R8
16005  	SHLL $0x05, R8
16006  	ORL  R8, SI
16007  	MOVB SI, (CX)
16008  	ADDQ $0x02, CX
16009  	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K
16010  
16011  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K:
16012  	LEAL -2(SI), SI
16013  	MOVB SI, (CX)
16014  	MOVW R8, 1(CX)
16015  	ADDQ $0x03, CX
16016  
16017  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K:
16018  	CMPL DX, 8(SP)
16019  	JAE  emit_remainder_encodeSnappyBetterBlockAsm64K
16020  	CMPQ CX, (SP)
16021  	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K
16022  	MOVQ $0x00000000, ret+56(FP)
16023  	RET
16024  
16025  match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K:
16026  	MOVQ  $0x00cf1bbcdcbfa563, SI
16027  	MOVQ  $0x9e3779b1, R8
16028  	LEAQ  1(DI), DI
16029  	LEAQ  -2(DX), R9
16030  	MOVQ  (BX)(DI*1), R10
16031  	MOVQ  1(BX)(DI*1), R11
16032  	MOVQ  (BX)(R9*1), R12
16033  	MOVQ  1(BX)(R9*1), R13
16034  	SHLQ  $0x08, R10
16035  	IMULQ SI, R10
16036  	SHRQ  $0x30, R10
16037  	SHLQ  $0x20, R11
16038  	IMULQ R8, R11
16039  	SHRQ  $0x33, R11
16040  	SHLQ  $0x08, R12
16041  	IMULQ SI, R12
16042  	SHRQ  $0x30, R12
16043  	SHLQ  $0x20, R13
16044  	IMULQ R8, R13
16045  	SHRQ  $0x33, R13
16046  	LEAQ  1(DI), R8
16047  	LEAQ  1(R9), R14
16048  	MOVL  DI, (AX)(R10*4)
16049  	MOVL  R9, (AX)(R12*4)
16050  	MOVL  R8, 262144(AX)(R11*4)
16051  	MOVL  R14, 262144(AX)(R13*4)
16052  	LEAQ  1(R9)(DI*1), R8
16053  	SHRQ  $0x01, R8
16054  	ADDQ  $0x01, DI
16055  	SUBQ  $0x01, R9
16056  
16057  index_loop_encodeSnappyBetterBlockAsm64K:
16058  	CMPQ  R8, R9
16059  	JAE   search_loop_encodeSnappyBetterBlockAsm64K
16060  	MOVQ  (BX)(DI*1), R10
16061  	MOVQ  (BX)(R8*1), R11
16062  	SHLQ  $0x08, R10
16063  	IMULQ SI, R10
16064  	SHRQ  $0x30, R10
16065  	SHLQ  $0x08, R11
16066  	IMULQ SI, R11
16067  	SHRQ  $0x30, R11
16068  	MOVL  DI, (AX)(R10*4)
16069  	MOVL  R8, (AX)(R11*4)
16070  	ADDQ  $0x02, DI
16071  	ADDQ  $0x02, R8
16072  	JMP   index_loop_encodeSnappyBetterBlockAsm64K
16073  
16074  emit_remainder_encodeSnappyBetterBlockAsm64K:
16075  	MOVQ src_len+32(FP), AX
16076  	SUBL 12(SP), AX
16077  	LEAQ 3(CX)(AX*1), AX
16078  	CMPQ AX, (SP)
16079  	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm64K
16080  	MOVQ $0x00000000, ret+56(FP)
16081  	RET
16082  
16083  emit_remainder_ok_encodeSnappyBetterBlockAsm64K:
16084  	MOVQ src_len+32(FP), AX
16085  	MOVL 12(SP), DX
16086  	CMPL DX, AX
16087  	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
16088  	MOVL AX, SI
16089  	MOVL AX, 12(SP)
16090  	LEAQ (BX)(DX*1), AX
16091  	SUBL DX, SI
16092  	LEAL -1(SI), DX
16093  	CMPL DX, $0x3c
16094  	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K
16095  	CMPL DX, $0x00000100
16096  	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K
16097  	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K
16098  
16099  three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
16100  	MOVB $0xf4, (CX)
16101  	MOVW DX, 1(CX)
16102  	ADDQ $0x03, CX
16103  	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
16104  
16105  two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
16106  	MOVB $0xf0, (CX)
16107  	MOVB DL, 1(CX)
16108  	ADDQ $0x02, CX
16109  	CMPL DX, $0x40
16110  	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm64K
16111  	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
16112  
16113  one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K:
16114  	SHLB $0x02, DL
16115  	MOVB DL, (CX)
16116  	ADDQ $0x01, CX
16117  
16118  memmove_emit_remainder_encodeSnappyBetterBlockAsm64K:
16119  	LEAQ (CX)(SI*1), DX
16120  	MOVL SI, BX
16121  
16122  	// genMemMoveShort
16123  	CMPQ BX, $0x03
16124  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2
16125  	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3
16126  	CMPQ BX, $0x08
16127  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7
16128  	CMPQ BX, $0x10
16129  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
16130  	CMPQ BX, $0x20
16131  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
16132  	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
16133  
16134  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2:
16135  	MOVB (AX), SI
16136  	MOVB -1(AX)(BX*1), AL
16137  	MOVB SI, (CX)
16138  	MOVB AL, -1(CX)(BX*1)
16139  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
16140  
16141  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3:
16142  	MOVW (AX), SI
16143  	MOVB 2(AX), AL
16144  	MOVW SI, (CX)
16145  	MOVB AL, 2(CX)
16146  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
16147  
16148  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7:
16149  	MOVL (AX), SI
16150  	MOVL -4(AX)(BX*1), AX
16151  	MOVL SI, (CX)
16152  	MOVL AX, -4(CX)(BX*1)
16153  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
16154  
16155  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
16156  	MOVQ (AX), SI
16157  	MOVQ -8(AX)(BX*1), AX
16158  	MOVQ SI, (CX)
16159  	MOVQ AX, -8(CX)(BX*1)
16160  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
16161  
16162  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
16163  	MOVOU (AX), X0
16164  	MOVOU -16(AX)(BX*1), X1
16165  	MOVOU X0, (CX)
16166  	MOVOU X1, -16(CX)(BX*1)
16167  	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
16168  
16169  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
16170  	MOVOU (AX), X0
16171  	MOVOU 16(AX), X1
16172  	MOVOU -32(AX)(BX*1), X2
16173  	MOVOU -16(AX)(BX*1), X3
16174  	MOVOU X0, (CX)
16175  	MOVOU X1, 16(CX)
16176  	MOVOU X2, -32(CX)(BX*1)
16177  	MOVOU X3, -16(CX)(BX*1)
16178  
16179  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K:
16180  	MOVQ DX, CX
16181  	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
16182  
16183  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K:
16184  	LEAQ (CX)(SI*1), DX
16185  	MOVL SI, BX
16186  
16187  	// genMemMoveLong
16188  	MOVOU (AX), X0
16189  	MOVOU 16(AX), X1
16190  	MOVOU -32(AX)(BX*1), X2
16191  	MOVOU -16(AX)(BX*1), X3
16192  	MOVQ  BX, DI
16193  	SHRQ  $0x05, DI
16194  	MOVQ  CX, SI
16195  	ANDL  $0x0000001f, SI
16196  	MOVQ  $0x00000040, R8
16197  	SUBQ  SI, R8
16198  	DECQ  DI
16199  	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
16200  	LEAQ  -32(AX)(R8*1), SI
16201  	LEAQ  -32(CX)(R8*1), R9
16202  
16203  emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
16204  	MOVOU (SI), X4
16205  	MOVOU 16(SI), X5
16206  	MOVOA X4, (R9)
16207  	MOVOA X5, 16(R9)
16208  	ADDQ  $0x20, R9
16209  	ADDQ  $0x20, SI
16210  	ADDQ  $0x20, R8
16211  	DECQ  DI
16212  	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
16213  
16214  emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
16215  	MOVOU -32(AX)(R8*1), X4
16216  	MOVOU -16(AX)(R8*1), X5
16217  	MOVOA X4, -32(CX)(R8*1)
16218  	MOVOA X5, -16(CX)(R8*1)
16219  	ADDQ  $0x20, R8
16220  	CMPQ  BX, R8
16221  	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
16222  	MOVOU X0, (CX)
16223  	MOVOU X1, 16(CX)
16224  	MOVOU X2, -32(CX)(BX*1)
16225  	MOVOU X3, -16(CX)(BX*1)
16226  	MOVQ  DX, CX
16227  
16228  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K:
16229  	MOVQ dst_base+0(FP), AX
16230  	SUBQ AX, CX
16231  	MOVQ CX, ret+56(FP)
16232  	RET
16233  
16234  // func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int
16235  // Requires: BMI, SSE2
16236  TEXT ·encodeSnappyBetterBlockAsm12B(SB), $24-64
16237  	MOVQ tmp+48(FP), AX
16238  	MOVQ dst_base+0(FP), CX
16239  	MOVQ $0x00000280, DX
16240  	MOVQ AX, BX
16241  	PXOR X0, X0
16242  
16243  zero_loop_encodeSnappyBetterBlockAsm12B:
16244  	MOVOU X0, (BX)
16245  	MOVOU X0, 16(BX)
16246  	MOVOU X0, 32(BX)
16247  	MOVOU X0, 48(BX)
16248  	MOVOU X0, 64(BX)
16249  	MOVOU X0, 80(BX)
16250  	MOVOU X0, 96(BX)
16251  	MOVOU X0, 112(BX)
16252  	ADDQ  $0x80, BX
16253  	DECQ  DX
16254  	JNZ   zero_loop_encodeSnappyBetterBlockAsm12B
16255  	MOVL  $0x00000000, 12(SP)
16256  	MOVQ  src_len+32(FP), DX
16257  	LEAQ  -9(DX), BX
16258  	LEAQ  -8(DX), SI
16259  	MOVL  SI, 8(SP)
16260  	SHRQ  $0x05, DX
16261  	SUBL  DX, BX
16262  	LEAQ  (CX)(BX*1), BX
16263  	MOVQ  BX, (SP)
16264  	MOVL  $0x00000001, DX
16265  	MOVL  $0x00000000, 16(SP)
16266  	MOVQ  src_base+24(FP), BX
16267  
16268  search_loop_encodeSnappyBetterBlockAsm12B:
16269  	MOVL  DX, SI
16270  	SUBL  12(SP), SI
16271  	SHRL  $0x06, SI
16272  	LEAL  1(DX)(SI*1), SI
16273  	CMPL  SI, 8(SP)
16274  	JAE   emit_remainder_encodeSnappyBetterBlockAsm12B
16275  	MOVQ  (BX)(DX*1), DI
16276  	MOVL  SI, 20(SP)
16277  	MOVQ  $0x0000cf1bbcdcbf9b, R9
16278  	MOVQ  $0x9e3779b1, SI
16279  	MOVQ  DI, R10
16280  	MOVQ  DI, R11
16281  	SHLQ  $0x10, R10
16282  	IMULQ R9, R10
16283  	SHRQ  $0x32, R10
16284  	SHLQ  $0x20, R11
16285  	IMULQ SI, R11
16286  	SHRQ  $0x34, R11
16287  	MOVL  (AX)(R10*4), SI
16288  	MOVL  65536(AX)(R11*4), R8
16289  	MOVL  DX, (AX)(R10*4)
16290  	MOVL  DX, 65536(AX)(R11*4)
16291  	MOVQ  (BX)(SI*1), R10
16292  	MOVQ  (BX)(R8*1), R11
16293  	CMPQ  R10, DI
16294  	JEQ   candidate_match_encodeSnappyBetterBlockAsm12B
16295  	CMPQ  R11, DI
16296  	JNE   no_short_found_encodeSnappyBetterBlockAsm12B
16297  	MOVL  R8, SI
16298  	JMP   candidate_match_encodeSnappyBetterBlockAsm12B
16299  
16300  no_short_found_encodeSnappyBetterBlockAsm12B:
16301  	CMPL R10, DI
16302  	JEQ  candidate_match_encodeSnappyBetterBlockAsm12B
16303  	CMPL R11, DI
16304  	JEQ  candidateS_match_encodeSnappyBetterBlockAsm12B
16305  	MOVL 20(SP), DX
16306  	JMP  search_loop_encodeSnappyBetterBlockAsm12B
16307  
16308  candidateS_match_encodeSnappyBetterBlockAsm12B:
16309  	SHRQ  $0x08, DI
16310  	MOVQ  DI, R10
16311  	SHLQ  $0x10, R10
16312  	IMULQ R9, R10
16313  	SHRQ  $0x32, R10
16314  	MOVL  (AX)(R10*4), SI
16315  	INCL  DX
16316  	MOVL  DX, (AX)(R10*4)
16317  	CMPL  (BX)(SI*1), DI
16318  	JEQ   candidate_match_encodeSnappyBetterBlockAsm12B
16319  	DECL  DX
16320  	MOVL  R8, SI
16321  
16322  candidate_match_encodeSnappyBetterBlockAsm12B:
16323  	MOVL  12(SP), DI
16324  	TESTL SI, SI
16325  	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm12B
16326  
16327  match_extend_back_loop_encodeSnappyBetterBlockAsm12B:
16328  	CMPL DX, DI
16329  	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm12B
16330  	MOVB -1(BX)(SI*1), R8
16331  	MOVB -1(BX)(DX*1), R9
16332  	CMPB R8, R9
16333  	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm12B
16334  	LEAL -1(DX), DX
16335  	DECL SI
16336  	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm12B
16337  	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm12B
16338  
16339  match_extend_back_end_encodeSnappyBetterBlockAsm12B:
16340  	MOVL DX, DI
16341  	SUBL 12(SP), DI
16342  	LEAQ 3(CX)(DI*1), DI
16343  	CMPQ DI, (SP)
16344  	JB   match_dst_size_check_encodeSnappyBetterBlockAsm12B
16345  	MOVQ $0x00000000, ret+56(FP)
16346  	RET
16347  
16348  match_dst_size_check_encodeSnappyBetterBlockAsm12B:
16349  	MOVL DX, DI
16350  	ADDL $0x04, DX
16351  	ADDL $0x04, SI
16352  	MOVQ src_len+32(FP), R8
16353  	SUBL DX, R8
16354  	LEAQ (BX)(DX*1), R9
16355  	LEAQ (BX)(SI*1), R10
16356  
16357  	// matchLen
16358  	XORL R12, R12
16359  
16360  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B:
16361  	CMPL R8, $0x10
16362  	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B
16363  	MOVQ (R9)(R12*1), R11
16364  	MOVQ 8(R9)(R12*1), R13
16365  	XORQ (R10)(R12*1), R11
16366  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B
16367  	XORQ 8(R10)(R12*1), R13
16368  	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B
16369  	LEAL -16(R8), R8
16370  	LEAL 16(R12), R12
16371  	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B
16372  
16373  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B:
16374  #ifdef GOAMD64_v3
16375  	TZCNTQ R13, R13
16376  
16377  #else
16378  	BSFQ R13, R13
16379  
16380  #endif
16381  	SARQ $0x03, R13
16382  	LEAL 8(R12)(R13*1), R12
16383  	JMP  match_nolit_end_encodeSnappyBetterBlockAsm12B
16384  
16385  matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B:
16386  	CMPL R8, $0x08
16387  	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
16388  	MOVQ (R9)(R12*1), R11
16389  	XORQ (R10)(R12*1), R11
16390  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B
16391  	LEAL -8(R8), R8
16392  	LEAL 8(R12), R12
16393  	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
16394  
16395  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B:
16396  #ifdef GOAMD64_v3
16397  	TZCNTQ R11, R11
16398  
16399  #else
16400  	BSFQ R11, R11
16401  
16402  #endif
16403  	SARQ $0x03, R11
16404  	LEAL (R12)(R11*1), R12
16405  	JMP  match_nolit_end_encodeSnappyBetterBlockAsm12B
16406  
16407  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B:
16408  	CMPL R8, $0x04
16409  	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
16410  	MOVL (R9)(R12*1), R11
16411  	CMPL (R10)(R12*1), R11
16412  	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
16413  	LEAL -4(R8), R8
16414  	LEAL 4(R12), R12
16415  
16416  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B:
16417  	CMPL R8, $0x01
16418  	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
16419  	JB   match_nolit_end_encodeSnappyBetterBlockAsm12B
16420  	MOVW (R9)(R12*1), R11
16421  	CMPW (R10)(R12*1), R11
16422  	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
16423  	LEAL 2(R12), R12
16424  	SUBL $0x02, R8
16425  	JZ   match_nolit_end_encodeSnappyBetterBlockAsm12B
16426  
16427  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B:
16428  	MOVB (R9)(R12*1), R11
16429  	CMPB (R10)(R12*1), R11
16430  	JNE  match_nolit_end_encodeSnappyBetterBlockAsm12B
16431  	LEAL 1(R12), R12
16432  
16433  match_nolit_end_encodeSnappyBetterBlockAsm12B:
16434  	MOVL DX, R8
16435  	SUBL SI, R8
16436  
16437  	// Check if repeat
16438  	MOVL R8, 16(SP)
16439  	MOVL 12(SP), SI
16440  	CMPL SI, DI
16441  	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
16442  	MOVL DI, R9
16443  	MOVL DI, 12(SP)
16444  	LEAQ (BX)(SI*1), R10
16445  	SUBL SI, R9
16446  	LEAL -1(R9), SI
16447  	CMPL SI, $0x3c
16448  	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm12B
16449  	CMPL SI, $0x00000100
16450  	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm12B
16451  	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm12B
16452  
16453  three_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
16454  	MOVB $0xf4, (CX)
16455  	MOVW SI, 1(CX)
16456  	ADDQ $0x03, CX
16457  	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
16458  
16459  two_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
16460  	MOVB $0xf0, (CX)
16461  	MOVB SI, 1(CX)
16462  	ADDQ $0x02, CX
16463  	CMPL SI, $0x40
16464  	JB   memmove_match_emit_encodeSnappyBetterBlockAsm12B
16465  	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
16466  
16467  one_byte_match_emit_encodeSnappyBetterBlockAsm12B:
16468  	SHLB $0x02, SI
16469  	MOVB SI, (CX)
16470  	ADDQ $0x01, CX
16471  
16472  memmove_match_emit_encodeSnappyBetterBlockAsm12B:
16473  	LEAQ (CX)(R9*1), SI
16474  
16475  	// genMemMoveShort
16476  	CMPQ R9, $0x08
16477  	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8
16478  	CMPQ R9, $0x10
16479  	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
16480  	CMPQ R9, $0x20
16481  	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
16482  	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
16483  
16484  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8:
16485  	MOVQ (R10), R11
16486  	MOVQ R11, (CX)
16487  	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
16488  
16489  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
16490  	MOVQ (R10), R11
16491  	MOVQ -8(R10)(R9*1), R10
16492  	MOVQ R11, (CX)
16493  	MOVQ R10, -8(CX)(R9*1)
16494  	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
16495  
16496  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
16497  	MOVOU (R10), X0
16498  	MOVOU -16(R10)(R9*1), X1
16499  	MOVOU X0, (CX)
16500  	MOVOU X1, -16(CX)(R9*1)
16501  	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
16502  
16503  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
16504  	MOVOU (R10), X0
16505  	MOVOU 16(R10), X1
16506  	MOVOU -32(R10)(R9*1), X2
16507  	MOVOU -16(R10)(R9*1), X3
16508  	MOVOU X0, (CX)
16509  	MOVOU X1, 16(CX)
16510  	MOVOU X2, -32(CX)(R9*1)
16511  	MOVOU X3, -16(CX)(R9*1)
16512  
16513  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B:
16514  	MOVQ SI, CX
16515  	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
16516  
16517  memmove_long_match_emit_encodeSnappyBetterBlockAsm12B:
16518  	LEAQ (CX)(R9*1), SI
16519  
16520  	// genMemMoveLong
16521  	MOVOU (R10), X0
16522  	MOVOU 16(R10), X1
16523  	MOVOU -32(R10)(R9*1), X2
16524  	MOVOU -16(R10)(R9*1), X3
16525  	MOVQ  R9, R13
16526  	SHRQ  $0x05, R13
16527  	MOVQ  CX, R11
16528  	ANDL  $0x0000001f, R11
16529  	MOVQ  $0x00000040, R14
16530  	SUBQ  R11, R14
16531  	DECQ  R13
16532  	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
16533  	LEAQ  -32(R10)(R14*1), R11
16534  	LEAQ  -32(CX)(R14*1), R15
16535  
16536  emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
16537  	MOVOU (R11), X4
16538  	MOVOU 16(R11), X5
16539  	MOVOA X4, (R15)
16540  	MOVOA X5, 16(R15)
16541  	ADDQ  $0x20, R15
16542  	ADDQ  $0x20, R11
16543  	ADDQ  $0x20, R14
16544  	DECQ  R13
16545  	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
16546  
16547  emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
16548  	MOVOU -32(R10)(R14*1), X4
16549  	MOVOU -16(R10)(R14*1), X5
16550  	MOVOA X4, -32(CX)(R14*1)
16551  	MOVOA X5, -16(CX)(R14*1)
16552  	ADDQ  $0x20, R14
16553  	CMPQ  R9, R14
16554  	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
16555  	MOVOU X0, (CX)
16556  	MOVOU X1, 16(CX)
16557  	MOVOU X2, -32(CX)(R9*1)
16558  	MOVOU X3, -16(CX)(R9*1)
16559  	MOVQ  SI, CX
16560  
16561  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B:
16562  	ADDL R12, DX
16563  	ADDL $0x04, R12
16564  	MOVL DX, 12(SP)
16565  
16566  	// emitCopy
16567  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B:
16568  	CMPL R12, $0x40
16569  	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B
16570  	MOVB $0xee, (CX)
16571  	MOVW R8, 1(CX)
16572  	LEAL -60(R12), R12
16573  	ADDQ $0x03, CX
16574  	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B
16575  
16576  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B:
16577  	MOVL R12, SI
16578  	SHLL $0x02, SI
16579  	CMPL R12, $0x0c
16580  	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
16581  	CMPL R8, $0x00000800
16582  	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
16583  	LEAL -15(SI), SI
16584  	MOVB R8, 1(CX)
16585  	SHRL $0x08, R8
16586  	SHLL $0x05, R8
16587  	ORL  R8, SI
16588  	MOVB SI, (CX)
16589  	ADDQ $0x02, CX
16590  	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B
16591  
16592  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B:
16593  	LEAL -2(SI), SI
16594  	MOVB SI, (CX)
16595  	MOVW R8, 1(CX)
16596  	ADDQ $0x03, CX
16597  
16598  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B:
16599  	CMPL DX, 8(SP)
16600  	JAE  emit_remainder_encodeSnappyBetterBlockAsm12B
16601  	CMPQ CX, (SP)
16602  	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B
16603  	MOVQ $0x00000000, ret+56(FP)
16604  	RET
16605  
16606  match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B:
16607  	MOVQ  $0x0000cf1bbcdcbf9b, SI
16608  	MOVQ  $0x9e3779b1, R8
16609  	LEAQ  1(DI), DI
16610  	LEAQ  -2(DX), R9
16611  	MOVQ  (BX)(DI*1), R10
16612  	MOVQ  1(BX)(DI*1), R11
16613  	MOVQ  (BX)(R9*1), R12
16614  	MOVQ  1(BX)(R9*1), R13
16615  	SHLQ  $0x10, R10
16616  	IMULQ SI, R10
16617  	SHRQ  $0x32, R10
16618  	SHLQ  $0x20, R11
16619  	IMULQ R8, R11
16620  	SHRQ  $0x34, R11
16621  	SHLQ  $0x10, R12
16622  	IMULQ SI, R12
16623  	SHRQ  $0x32, R12
16624  	SHLQ  $0x20, R13
16625  	IMULQ R8, R13
16626  	SHRQ  $0x34, R13
16627  	LEAQ  1(DI), R8
16628  	LEAQ  1(R9), R14
16629  	MOVL  DI, (AX)(R10*4)
16630  	MOVL  R9, (AX)(R12*4)
16631  	MOVL  R8, 65536(AX)(R11*4)
16632  	MOVL  R14, 65536(AX)(R13*4)
16633  	LEAQ  1(R9)(DI*1), R8
16634  	SHRQ  $0x01, R8
16635  	ADDQ  $0x01, DI
16636  	SUBQ  $0x01, R9
16637  
16638  index_loop_encodeSnappyBetterBlockAsm12B:
16639  	CMPQ  R8, R9
16640  	JAE   search_loop_encodeSnappyBetterBlockAsm12B
16641  	MOVQ  (BX)(DI*1), R10
16642  	MOVQ  (BX)(R8*1), R11
16643  	SHLQ  $0x10, R10
16644  	IMULQ SI, R10
16645  	SHRQ  $0x32, R10
16646  	SHLQ  $0x10, R11
16647  	IMULQ SI, R11
16648  	SHRQ  $0x32, R11
16649  	MOVL  DI, (AX)(R10*4)
16650  	MOVL  R8, (AX)(R11*4)
16651  	ADDQ  $0x02, DI
16652  	ADDQ  $0x02, R8
16653  	JMP   index_loop_encodeSnappyBetterBlockAsm12B
16654  
16655  emit_remainder_encodeSnappyBetterBlockAsm12B:
16656  	MOVQ src_len+32(FP), AX
16657  	SUBL 12(SP), AX
16658  	LEAQ 3(CX)(AX*1), AX
16659  	CMPQ AX, (SP)
16660  	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm12B
16661  	MOVQ $0x00000000, ret+56(FP)
16662  	RET
16663  
16664  emit_remainder_ok_encodeSnappyBetterBlockAsm12B:
16665  	MOVQ src_len+32(FP), AX
16666  	MOVL 12(SP), DX
16667  	CMPL DX, AX
16668  	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
16669  	MOVL AX, SI
16670  	MOVL AX, 12(SP)
16671  	LEAQ (BX)(DX*1), AX
16672  	SUBL DX, SI
16673  	LEAL -1(SI), DX
16674  	CMPL DX, $0x3c
16675  	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B
16676  	CMPL DX, $0x00000100
16677  	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B
16678  	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B
16679  
16680  three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
16681  	MOVB $0xf4, (CX)
16682  	MOVW DX, 1(CX)
16683  	ADDQ $0x03, CX
16684  	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
16685  
16686  two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
16687  	MOVB $0xf0, (CX)
16688  	MOVB DL, 1(CX)
16689  	ADDQ $0x02, CX
16690  	CMPL DX, $0x40
16691  	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm12B
16692  	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
16693  
16694  one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B:
16695  	SHLB $0x02, DL
16696  	MOVB DL, (CX)
16697  	ADDQ $0x01, CX
16698  
16699  memmove_emit_remainder_encodeSnappyBetterBlockAsm12B:
16700  	LEAQ (CX)(SI*1), DX
16701  	MOVL SI, BX
16702  
16703  	// genMemMoveShort
16704  	CMPQ BX, $0x03
16705  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2
16706  	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3
16707  	CMPQ BX, $0x08
16708  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7
16709  	CMPQ BX, $0x10
16710  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
16711  	CMPQ BX, $0x20
16712  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
16713  	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
16714  
16715  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2:
16716  	MOVB (AX), SI
16717  	MOVB -1(AX)(BX*1), AL
16718  	MOVB SI, (CX)
16719  	MOVB AL, -1(CX)(BX*1)
16720  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
16721  
16722  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3:
16723  	MOVW (AX), SI
16724  	MOVB 2(AX), AL
16725  	MOVW SI, (CX)
16726  	MOVB AL, 2(CX)
16727  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
16728  
16729  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7:
16730  	MOVL (AX), SI
16731  	MOVL -4(AX)(BX*1), AX
16732  	MOVL SI, (CX)
16733  	MOVL AX, -4(CX)(BX*1)
16734  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
16735  
16736  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
16737  	MOVQ (AX), SI
16738  	MOVQ -8(AX)(BX*1), AX
16739  	MOVQ SI, (CX)
16740  	MOVQ AX, -8(CX)(BX*1)
16741  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
16742  
16743  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
16744  	MOVOU (AX), X0
16745  	MOVOU -16(AX)(BX*1), X1
16746  	MOVOU X0, (CX)
16747  	MOVOU X1, -16(CX)(BX*1)
16748  	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
16749  
16750  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
16751  	MOVOU (AX), X0
16752  	MOVOU 16(AX), X1
16753  	MOVOU -32(AX)(BX*1), X2
16754  	MOVOU -16(AX)(BX*1), X3
16755  	MOVOU X0, (CX)
16756  	MOVOU X1, 16(CX)
16757  	MOVOU X2, -32(CX)(BX*1)
16758  	MOVOU X3, -16(CX)(BX*1)
16759  
16760  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B:
16761  	MOVQ DX, CX
16762  	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
16763  
16764  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B:
16765  	LEAQ (CX)(SI*1), DX
16766  	MOVL SI, BX
16767  
16768  	// genMemMoveLong
16769  	MOVOU (AX), X0
16770  	MOVOU 16(AX), X1
16771  	MOVOU -32(AX)(BX*1), X2
16772  	MOVOU -16(AX)(BX*1), X3
16773  	MOVQ  BX, DI
16774  	SHRQ  $0x05, DI
16775  	MOVQ  CX, SI
16776  	ANDL  $0x0000001f, SI
16777  	MOVQ  $0x00000040, R8
16778  	SUBQ  SI, R8
16779  	DECQ  DI
16780  	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
16781  	LEAQ  -32(AX)(R8*1), SI
16782  	LEAQ  -32(CX)(R8*1), R9
16783  
16784  emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
16785  	MOVOU (SI), X4
16786  	MOVOU 16(SI), X5
16787  	MOVOA X4, (R9)
16788  	MOVOA X5, 16(R9)
16789  	ADDQ  $0x20, R9
16790  	ADDQ  $0x20, SI
16791  	ADDQ  $0x20, R8
16792  	DECQ  DI
16793  	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
16794  
16795  emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
16796  	MOVOU -32(AX)(R8*1), X4
16797  	MOVOU -16(AX)(R8*1), X5
16798  	MOVOA X4, -32(CX)(R8*1)
16799  	MOVOA X5, -16(CX)(R8*1)
16800  	ADDQ  $0x20, R8
16801  	CMPQ  BX, R8
16802  	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
16803  	MOVOU X0, (CX)
16804  	MOVOU X1, 16(CX)
16805  	MOVOU X2, -32(CX)(BX*1)
16806  	MOVOU X3, -16(CX)(BX*1)
16807  	MOVQ  DX, CX
16808  
16809  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B:
16810  	MOVQ dst_base+0(FP), AX
16811  	SUBQ AX, CX
16812  	MOVQ CX, ret+56(FP)
16813  	RET
16814  
16815  // func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int
16816  // Requires: BMI, SSE2
16817  TEXT ·encodeSnappyBetterBlockAsm10B(SB), $24-64
16818  	MOVQ tmp+48(FP), AX
16819  	MOVQ dst_base+0(FP), CX
16820  	MOVQ $0x000000a0, DX
16821  	MOVQ AX, BX
16822  	PXOR X0, X0
16823  
16824  zero_loop_encodeSnappyBetterBlockAsm10B:
16825  	MOVOU X0, (BX)
16826  	MOVOU X0, 16(BX)
16827  	MOVOU X0, 32(BX)
16828  	MOVOU X0, 48(BX)
16829  	MOVOU X0, 64(BX)
16830  	MOVOU X0, 80(BX)
16831  	MOVOU X0, 96(BX)
16832  	MOVOU X0, 112(BX)
16833  	ADDQ  $0x80, BX
16834  	DECQ  DX
16835  	JNZ   zero_loop_encodeSnappyBetterBlockAsm10B
16836  	MOVL  $0x00000000, 12(SP)
16837  	MOVQ  src_len+32(FP), DX
16838  	LEAQ  -9(DX), BX
16839  	LEAQ  -8(DX), SI
16840  	MOVL  SI, 8(SP)
16841  	SHRQ  $0x05, DX
16842  	SUBL  DX, BX
16843  	LEAQ  (CX)(BX*1), BX
16844  	MOVQ  BX, (SP)
16845  	MOVL  $0x00000001, DX
16846  	MOVL  $0x00000000, 16(SP)
16847  	MOVQ  src_base+24(FP), BX
16848  
16849  search_loop_encodeSnappyBetterBlockAsm10B:
16850  	MOVL  DX, SI
16851  	SUBL  12(SP), SI
16852  	SHRL  $0x05, SI
16853  	LEAL  1(DX)(SI*1), SI
16854  	CMPL  SI, 8(SP)
16855  	JAE   emit_remainder_encodeSnappyBetterBlockAsm10B
16856  	MOVQ  (BX)(DX*1), DI
16857  	MOVL  SI, 20(SP)
16858  	MOVQ  $0x0000cf1bbcdcbf9b, R9
16859  	MOVQ  $0x9e3779b1, SI
16860  	MOVQ  DI, R10
16861  	MOVQ  DI, R11
16862  	SHLQ  $0x10, R10
16863  	IMULQ R9, R10
16864  	SHRQ  $0x34, R10
16865  	SHLQ  $0x20, R11
16866  	IMULQ SI, R11
16867  	SHRQ  $0x36, R11
16868  	MOVL  (AX)(R10*4), SI
16869  	MOVL  16384(AX)(R11*4), R8
16870  	MOVL  DX, (AX)(R10*4)
16871  	MOVL  DX, 16384(AX)(R11*4)
16872  	MOVQ  (BX)(SI*1), R10
16873  	MOVQ  (BX)(R8*1), R11
16874  	CMPQ  R10, DI
16875  	JEQ   candidate_match_encodeSnappyBetterBlockAsm10B
16876  	CMPQ  R11, DI
16877  	JNE   no_short_found_encodeSnappyBetterBlockAsm10B
16878  	MOVL  R8, SI
16879  	JMP   candidate_match_encodeSnappyBetterBlockAsm10B
16880  
16881  no_short_found_encodeSnappyBetterBlockAsm10B:
16882  	CMPL R10, DI
16883  	JEQ  candidate_match_encodeSnappyBetterBlockAsm10B
16884  	CMPL R11, DI
16885  	JEQ  candidateS_match_encodeSnappyBetterBlockAsm10B
16886  	MOVL 20(SP), DX
16887  	JMP  search_loop_encodeSnappyBetterBlockAsm10B
16888  
16889  candidateS_match_encodeSnappyBetterBlockAsm10B:
16890  	SHRQ  $0x08, DI
16891  	MOVQ  DI, R10
16892  	SHLQ  $0x10, R10
16893  	IMULQ R9, R10
16894  	SHRQ  $0x34, R10
16895  	MOVL  (AX)(R10*4), SI
16896  	INCL  DX
16897  	MOVL  DX, (AX)(R10*4)
16898  	CMPL  (BX)(SI*1), DI
16899  	JEQ   candidate_match_encodeSnappyBetterBlockAsm10B
16900  	DECL  DX
16901  	MOVL  R8, SI
16902  
16903  candidate_match_encodeSnappyBetterBlockAsm10B:
16904  	MOVL  12(SP), DI
16905  	TESTL SI, SI
16906  	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm10B
16907  
16908  match_extend_back_loop_encodeSnappyBetterBlockAsm10B:
16909  	CMPL DX, DI
16910  	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm10B
16911  	MOVB -1(BX)(SI*1), R8
16912  	MOVB -1(BX)(DX*1), R9
16913  	CMPB R8, R9
16914  	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm10B
16915  	LEAL -1(DX), DX
16916  	DECL SI
16917  	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm10B
16918  	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm10B
16919  
16920  match_extend_back_end_encodeSnappyBetterBlockAsm10B:
16921  	MOVL DX, DI
16922  	SUBL 12(SP), DI
16923  	LEAQ 3(CX)(DI*1), DI
16924  	CMPQ DI, (SP)
16925  	JB   match_dst_size_check_encodeSnappyBetterBlockAsm10B
16926  	MOVQ $0x00000000, ret+56(FP)
16927  	RET
16928  
16929  match_dst_size_check_encodeSnappyBetterBlockAsm10B:
16930  	MOVL DX, DI
16931  	ADDL $0x04, DX
16932  	ADDL $0x04, SI
16933  	MOVQ src_len+32(FP), R8
16934  	SUBL DX, R8
16935  	LEAQ (BX)(DX*1), R9
16936  	LEAQ (BX)(SI*1), R10
16937  
16938  	// matchLen
16939  	XORL R12, R12
16940  
16941  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B:
16942  	CMPL R8, $0x10
16943  	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B
16944  	MOVQ (R9)(R12*1), R11
16945  	MOVQ 8(R9)(R12*1), R13
16946  	XORQ (R10)(R12*1), R11
16947  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B
16948  	XORQ 8(R10)(R12*1), R13
16949  	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B
16950  	LEAL -16(R8), R8
16951  	LEAL 16(R12), R12
16952  	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B
16953  
16954  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B:
16955  #ifdef GOAMD64_v3
16956  	TZCNTQ R13, R13
16957  
16958  #else
16959  	BSFQ R13, R13
16960  
16961  #endif
16962  	SARQ $0x03, R13
16963  	LEAL 8(R12)(R13*1), R12
16964  	JMP  match_nolit_end_encodeSnappyBetterBlockAsm10B
16965  
16966  matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B:
16967  	CMPL R8, $0x08
16968  	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
16969  	MOVQ (R9)(R12*1), R11
16970  	XORQ (R10)(R12*1), R11
16971  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B
16972  	LEAL -8(R8), R8
16973  	LEAL 8(R12), R12
16974  	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
16975  
16976  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B:
16977  #ifdef GOAMD64_v3
16978  	TZCNTQ R11, R11
16979  
16980  #else
16981  	BSFQ R11, R11
16982  
16983  #endif
16984  	SARQ $0x03, R11
16985  	LEAL (R12)(R11*1), R12
16986  	JMP  match_nolit_end_encodeSnappyBetterBlockAsm10B
16987  
16988  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B:
16989  	CMPL R8, $0x04
16990  	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
16991  	MOVL (R9)(R12*1), R11
16992  	CMPL (R10)(R12*1), R11
16993  	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
16994  	LEAL -4(R8), R8
16995  	LEAL 4(R12), R12
16996  
16997  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B:
16998  	CMPL R8, $0x01
16999  	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
17000  	JB   match_nolit_end_encodeSnappyBetterBlockAsm10B
17001  	MOVW (R9)(R12*1), R11
17002  	CMPW (R10)(R12*1), R11
17003  	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
17004  	LEAL 2(R12), R12
17005  	SUBL $0x02, R8
17006  	JZ   match_nolit_end_encodeSnappyBetterBlockAsm10B
17007  
17008  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B:
17009  	MOVB (R9)(R12*1), R11
17010  	CMPB (R10)(R12*1), R11
17011  	JNE  match_nolit_end_encodeSnappyBetterBlockAsm10B
17012  	LEAL 1(R12), R12
17013  
17014  match_nolit_end_encodeSnappyBetterBlockAsm10B:
17015  	MOVL DX, R8
17016  	SUBL SI, R8
17017  
17018  	// Check if repeat
17019  	MOVL R8, 16(SP)
17020  	MOVL 12(SP), SI
17021  	CMPL SI, DI
17022  	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
17023  	MOVL DI, R9
17024  	MOVL DI, 12(SP)
17025  	LEAQ (BX)(SI*1), R10
17026  	SUBL SI, R9
17027  	LEAL -1(R9), SI
17028  	CMPL SI, $0x3c
17029  	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm10B
17030  	CMPL SI, $0x00000100
17031  	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm10B
17032  	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm10B
17033  
17034  three_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
17035  	MOVB $0xf4, (CX)
17036  	MOVW SI, 1(CX)
17037  	ADDQ $0x03, CX
17038  	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
17039  
17040  two_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
17041  	MOVB $0xf0, (CX)
17042  	MOVB SI, 1(CX)
17043  	ADDQ $0x02, CX
17044  	CMPL SI, $0x40
17045  	JB   memmove_match_emit_encodeSnappyBetterBlockAsm10B
17046  	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
17047  
17048  one_byte_match_emit_encodeSnappyBetterBlockAsm10B:
17049  	SHLB $0x02, SI
17050  	MOVB SI, (CX)
17051  	ADDQ $0x01, CX
17052  
17053  memmove_match_emit_encodeSnappyBetterBlockAsm10B:
17054  	LEAQ (CX)(R9*1), SI
17055  
17056  	// genMemMoveShort
17057  	CMPQ R9, $0x08
17058  	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8
17059  	CMPQ R9, $0x10
17060  	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
17061  	CMPQ R9, $0x20
17062  	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
17063  	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
17064  
17065  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8:
17066  	MOVQ (R10), R11
17067  	MOVQ R11, (CX)
17068  	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
17069  
17070  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
17071  	MOVQ (R10), R11
17072  	MOVQ -8(R10)(R9*1), R10
17073  	MOVQ R11, (CX)
17074  	MOVQ R10, -8(CX)(R9*1)
17075  	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
17076  
17077  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
17078  	MOVOU (R10), X0
17079  	MOVOU -16(R10)(R9*1), X1
17080  	MOVOU X0, (CX)
17081  	MOVOU X1, -16(CX)(R9*1)
17082  	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
17083  
17084  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
17085  	MOVOU (R10), X0
17086  	MOVOU 16(R10), X1
17087  	MOVOU -32(R10)(R9*1), X2
17088  	MOVOU -16(R10)(R9*1), X3
17089  	MOVOU X0, (CX)
17090  	MOVOU X1, 16(CX)
17091  	MOVOU X2, -32(CX)(R9*1)
17092  	MOVOU X3, -16(CX)(R9*1)
17093  
17094  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B:
17095  	MOVQ SI, CX
17096  	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
17097  
17098  memmove_long_match_emit_encodeSnappyBetterBlockAsm10B:
17099  	LEAQ (CX)(R9*1), SI
17100  
17101  	// genMemMoveLong
17102  	MOVOU (R10), X0
17103  	MOVOU 16(R10), X1
17104  	MOVOU -32(R10)(R9*1), X2
17105  	MOVOU -16(R10)(R9*1), X3
17106  	MOVQ  R9, R13
17107  	SHRQ  $0x05, R13
17108  	MOVQ  CX, R11
17109  	ANDL  $0x0000001f, R11
17110  	MOVQ  $0x00000040, R14
17111  	SUBQ  R11, R14
17112  	DECQ  R13
17113  	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
17114  	LEAQ  -32(R10)(R14*1), R11
17115  	LEAQ  -32(CX)(R14*1), R15
17116  
17117  emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
17118  	MOVOU (R11), X4
17119  	MOVOU 16(R11), X5
17120  	MOVOA X4, (R15)
17121  	MOVOA X5, 16(R15)
17122  	ADDQ  $0x20, R15
17123  	ADDQ  $0x20, R11
17124  	ADDQ  $0x20, R14
17125  	DECQ  R13
17126  	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
17127  
17128  emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
17129  	MOVOU -32(R10)(R14*1), X4
17130  	MOVOU -16(R10)(R14*1), X5
17131  	MOVOA X4, -32(CX)(R14*1)
17132  	MOVOA X5, -16(CX)(R14*1)
17133  	ADDQ  $0x20, R14
17134  	CMPQ  R9, R14
17135  	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
17136  	MOVOU X0, (CX)
17137  	MOVOU X1, 16(CX)
17138  	MOVOU X2, -32(CX)(R9*1)
17139  	MOVOU X3, -16(CX)(R9*1)
17140  	MOVQ  SI, CX
17141  
17142  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B:
17143  	ADDL R12, DX
17144  	ADDL $0x04, R12
17145  	MOVL DX, 12(SP)
17146  
17147  	// emitCopy
17148  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B:
17149  	CMPL R12, $0x40
17150  	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B
17151  	MOVB $0xee, (CX)
17152  	MOVW R8, 1(CX)
17153  	LEAL -60(R12), R12
17154  	ADDQ $0x03, CX
17155  	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B
17156  
17157  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B:
17158  	MOVL R12, SI
17159  	SHLL $0x02, SI
17160  	CMPL R12, $0x0c
17161  	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
17162  	CMPL R8, $0x00000800
17163  	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
17164  	LEAL -15(SI), SI
17165  	MOVB R8, 1(CX)
17166  	SHRL $0x08, R8
17167  	SHLL $0x05, R8
17168  	ORL  R8, SI
17169  	MOVB SI, (CX)
17170  	ADDQ $0x02, CX
17171  	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B
17172  
17173  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B:
17174  	LEAL -2(SI), SI
17175  	MOVB SI, (CX)
17176  	MOVW R8, 1(CX)
17177  	ADDQ $0x03, CX
17178  
17179  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B:
17180  	CMPL DX, 8(SP)
17181  	JAE  emit_remainder_encodeSnappyBetterBlockAsm10B
17182  	CMPQ CX, (SP)
17183  	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B
17184  	MOVQ $0x00000000, ret+56(FP)
17185  	RET
17186  
17187  match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B:
17188  	MOVQ  $0x0000cf1bbcdcbf9b, SI
17189  	MOVQ  $0x9e3779b1, R8
17190  	LEAQ  1(DI), DI
17191  	LEAQ  -2(DX), R9
17192  	MOVQ  (BX)(DI*1), R10
17193  	MOVQ  1(BX)(DI*1), R11
17194  	MOVQ  (BX)(R9*1), R12
17195  	MOVQ  1(BX)(R9*1), R13
17196  	SHLQ  $0x10, R10
17197  	IMULQ SI, R10
17198  	SHRQ  $0x34, R10
17199  	SHLQ  $0x20, R11
17200  	IMULQ R8, R11
17201  	SHRQ  $0x36, R11
17202  	SHLQ  $0x10, R12
17203  	IMULQ SI, R12
17204  	SHRQ  $0x34, R12
17205  	SHLQ  $0x20, R13
17206  	IMULQ R8, R13
17207  	SHRQ  $0x36, R13
17208  	LEAQ  1(DI), R8
17209  	LEAQ  1(R9), R14
17210  	MOVL  DI, (AX)(R10*4)
17211  	MOVL  R9, (AX)(R12*4)
17212  	MOVL  R8, 16384(AX)(R11*4)
17213  	MOVL  R14, 16384(AX)(R13*4)
17214  	LEAQ  1(R9)(DI*1), R8
17215  	SHRQ  $0x01, R8
17216  	ADDQ  $0x01, DI
17217  	SUBQ  $0x01, R9
17218  
17219  index_loop_encodeSnappyBetterBlockAsm10B:
17220  	CMPQ  R8, R9
17221  	JAE   search_loop_encodeSnappyBetterBlockAsm10B
17222  	MOVQ  (BX)(DI*1), R10
17223  	MOVQ  (BX)(R8*1), R11
17224  	SHLQ  $0x10, R10
17225  	IMULQ SI, R10
17226  	SHRQ  $0x34, R10
17227  	SHLQ  $0x10, R11
17228  	IMULQ SI, R11
17229  	SHRQ  $0x34, R11
17230  	MOVL  DI, (AX)(R10*4)
17231  	MOVL  R8, (AX)(R11*4)
17232  	ADDQ  $0x02, DI
17233  	ADDQ  $0x02, R8
17234  	JMP   index_loop_encodeSnappyBetterBlockAsm10B
17235  
17236  emit_remainder_encodeSnappyBetterBlockAsm10B:
17237  	MOVQ src_len+32(FP), AX
17238  	SUBL 12(SP), AX
17239  	LEAQ 3(CX)(AX*1), AX
17240  	CMPQ AX, (SP)
17241  	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm10B
17242  	MOVQ $0x00000000, ret+56(FP)
17243  	RET
17244  
17245  emit_remainder_ok_encodeSnappyBetterBlockAsm10B:
17246  	MOVQ src_len+32(FP), AX
17247  	MOVL 12(SP), DX
17248  	CMPL DX, AX
17249  	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
17250  	MOVL AX, SI
17251  	MOVL AX, 12(SP)
17252  	LEAQ (BX)(DX*1), AX
17253  	SUBL DX, SI
17254  	LEAL -1(SI), DX
17255  	CMPL DX, $0x3c
17256  	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B
17257  	CMPL DX, $0x00000100
17258  	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B
17259  	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B
17260  
17261  three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
17262  	MOVB $0xf4, (CX)
17263  	MOVW DX, 1(CX)
17264  	ADDQ $0x03, CX
17265  	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
17266  
17267  two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
17268  	MOVB $0xf0, (CX)
17269  	MOVB DL, 1(CX)
17270  	ADDQ $0x02, CX
17271  	CMPL DX, $0x40
17272  	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm10B
17273  	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
17274  
17275  one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B:
17276  	SHLB $0x02, DL
17277  	MOVB DL, (CX)
17278  	ADDQ $0x01, CX
17279  
17280  memmove_emit_remainder_encodeSnappyBetterBlockAsm10B:
17281  	LEAQ (CX)(SI*1), DX
17282  	MOVL SI, BX
17283  
17284  	// genMemMoveShort
17285  	CMPQ BX, $0x03
17286  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2
17287  	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3
17288  	CMPQ BX, $0x08
17289  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7
17290  	CMPQ BX, $0x10
17291  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
17292  	CMPQ BX, $0x20
17293  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
17294  	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
17295  
17296  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2:
17297  	MOVB (AX), SI
17298  	MOVB -1(AX)(BX*1), AL
17299  	MOVB SI, (CX)
17300  	MOVB AL, -1(CX)(BX*1)
17301  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
17302  
17303  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3:
17304  	MOVW (AX), SI
17305  	MOVB 2(AX), AL
17306  	MOVW SI, (CX)
17307  	MOVB AL, 2(CX)
17308  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
17309  
17310  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7:
17311  	MOVL (AX), SI
17312  	MOVL -4(AX)(BX*1), AX
17313  	MOVL SI, (CX)
17314  	MOVL AX, -4(CX)(BX*1)
17315  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
17316  
17317  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
17318  	MOVQ (AX), SI
17319  	MOVQ -8(AX)(BX*1), AX
17320  	MOVQ SI, (CX)
17321  	MOVQ AX, -8(CX)(BX*1)
17322  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
17323  
17324  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
17325  	MOVOU (AX), X0
17326  	MOVOU -16(AX)(BX*1), X1
17327  	MOVOU X0, (CX)
17328  	MOVOU X1, -16(CX)(BX*1)
17329  	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
17330  
17331  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
17332  	MOVOU (AX), X0
17333  	MOVOU 16(AX), X1
17334  	MOVOU -32(AX)(BX*1), X2
17335  	MOVOU -16(AX)(BX*1), X3
17336  	MOVOU X0, (CX)
17337  	MOVOU X1, 16(CX)
17338  	MOVOU X2, -32(CX)(BX*1)
17339  	MOVOU X3, -16(CX)(BX*1)
17340  
17341  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B:
17342  	MOVQ DX, CX
17343  	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
17344  
17345  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B:
17346  	LEAQ (CX)(SI*1), DX
17347  	MOVL SI, BX
17348  
17349  	// genMemMoveLong
17350  	MOVOU (AX), X0
17351  	MOVOU 16(AX), X1
17352  	MOVOU -32(AX)(BX*1), X2
17353  	MOVOU -16(AX)(BX*1), X3
17354  	MOVQ  BX, DI
17355  	SHRQ  $0x05, DI
17356  	MOVQ  CX, SI
17357  	ANDL  $0x0000001f, SI
17358  	MOVQ  $0x00000040, R8
17359  	SUBQ  SI, R8
17360  	DECQ  DI
17361  	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
17362  	LEAQ  -32(AX)(R8*1), SI
17363  	LEAQ  -32(CX)(R8*1), R9
17364  
17365  emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
17366  	MOVOU (SI), X4
17367  	MOVOU 16(SI), X5
17368  	MOVOA X4, (R9)
17369  	MOVOA X5, 16(R9)
17370  	ADDQ  $0x20, R9
17371  	ADDQ  $0x20, SI
17372  	ADDQ  $0x20, R8
17373  	DECQ  DI
17374  	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
17375  
17376  emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
17377  	MOVOU -32(AX)(R8*1), X4
17378  	MOVOU -16(AX)(R8*1), X5
17379  	MOVOA X4, -32(CX)(R8*1)
17380  	MOVOA X5, -16(CX)(R8*1)
17381  	ADDQ  $0x20, R8
17382  	CMPQ  BX, R8
17383  	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
17384  	MOVOU X0, (CX)
17385  	MOVOU X1, 16(CX)
17386  	MOVOU X2, -32(CX)(BX*1)
17387  	MOVOU X3, -16(CX)(BX*1)
17388  	MOVQ  DX, CX
17389  
17390  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B:
17391  	MOVQ dst_base+0(FP), AX
17392  	SUBQ AX, CX
17393  	MOVQ CX, ret+56(FP)
17394  	RET
17395  
17396  // func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int
17397  // Requires: BMI, SSE2
17398  TEXT ·encodeSnappyBetterBlockAsm8B(SB), $24-64
17399  	MOVQ tmp+48(FP), AX
17400  	MOVQ dst_base+0(FP), CX
17401  	MOVQ $0x00000028, DX
17402  	MOVQ AX, BX
17403  	PXOR X0, X0
17404  
17405  zero_loop_encodeSnappyBetterBlockAsm8B:
17406  	MOVOU X0, (BX)
17407  	MOVOU X0, 16(BX)
17408  	MOVOU X0, 32(BX)
17409  	MOVOU X0, 48(BX)
17410  	MOVOU X0, 64(BX)
17411  	MOVOU X0, 80(BX)
17412  	MOVOU X0, 96(BX)
17413  	MOVOU X0, 112(BX)
17414  	ADDQ  $0x80, BX
17415  	DECQ  DX
17416  	JNZ   zero_loop_encodeSnappyBetterBlockAsm8B
17417  	MOVL  $0x00000000, 12(SP)
17418  	MOVQ  src_len+32(FP), DX
17419  	LEAQ  -9(DX), BX
17420  	LEAQ  -8(DX), SI
17421  	MOVL  SI, 8(SP)
17422  	SHRQ  $0x05, DX
17423  	SUBL  DX, BX
17424  	LEAQ  (CX)(BX*1), BX
17425  	MOVQ  BX, (SP)
17426  	MOVL  $0x00000001, DX
17427  	MOVL  $0x00000000, 16(SP)
17428  	MOVQ  src_base+24(FP), BX
17429  
17430  search_loop_encodeSnappyBetterBlockAsm8B:
17431  	MOVL  DX, SI
17432  	SUBL  12(SP), SI
17433  	SHRL  $0x04, SI
17434  	LEAL  1(DX)(SI*1), SI
17435  	CMPL  SI, 8(SP)
17436  	JAE   emit_remainder_encodeSnappyBetterBlockAsm8B
17437  	MOVQ  (BX)(DX*1), DI
17438  	MOVL  SI, 20(SP)
17439  	MOVQ  $0x0000cf1bbcdcbf9b, R9
17440  	MOVQ  $0x9e3779b1, SI
17441  	MOVQ  DI, R10
17442  	MOVQ  DI, R11
17443  	SHLQ  $0x10, R10
17444  	IMULQ R9, R10
17445  	SHRQ  $0x36, R10
17446  	SHLQ  $0x20, R11
17447  	IMULQ SI, R11
17448  	SHRQ  $0x38, R11
17449  	MOVL  (AX)(R10*4), SI
17450  	MOVL  4096(AX)(R11*4), R8
17451  	MOVL  DX, (AX)(R10*4)
17452  	MOVL  DX, 4096(AX)(R11*4)
17453  	MOVQ  (BX)(SI*1), R10
17454  	MOVQ  (BX)(R8*1), R11
17455  	CMPQ  R10, DI
17456  	JEQ   candidate_match_encodeSnappyBetterBlockAsm8B
17457  	CMPQ  R11, DI
17458  	JNE   no_short_found_encodeSnappyBetterBlockAsm8B
17459  	MOVL  R8, SI
17460  	JMP   candidate_match_encodeSnappyBetterBlockAsm8B
17461  
17462  no_short_found_encodeSnappyBetterBlockAsm8B:
17463  	CMPL R10, DI
17464  	JEQ  candidate_match_encodeSnappyBetterBlockAsm8B
17465  	CMPL R11, DI
17466  	JEQ  candidateS_match_encodeSnappyBetterBlockAsm8B
17467  	MOVL 20(SP), DX
17468  	JMP  search_loop_encodeSnappyBetterBlockAsm8B
17469  
17470  candidateS_match_encodeSnappyBetterBlockAsm8B:
17471  	SHRQ  $0x08, DI
17472  	MOVQ  DI, R10
17473  	SHLQ  $0x10, R10
17474  	IMULQ R9, R10
17475  	SHRQ  $0x36, R10
17476  	MOVL  (AX)(R10*4), SI
17477  	INCL  DX
17478  	MOVL  DX, (AX)(R10*4)
17479  	CMPL  (BX)(SI*1), DI
17480  	JEQ   candidate_match_encodeSnappyBetterBlockAsm8B
17481  	DECL  DX
17482  	MOVL  R8, SI
17483  
17484  candidate_match_encodeSnappyBetterBlockAsm8B:
17485  	MOVL  12(SP), DI
17486  	TESTL SI, SI
17487  	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm8B
17488  
17489  match_extend_back_loop_encodeSnappyBetterBlockAsm8B:
17490  	CMPL DX, DI
17491  	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm8B
17492  	MOVB -1(BX)(SI*1), R8
17493  	MOVB -1(BX)(DX*1), R9
17494  	CMPB R8, R9
17495  	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm8B
17496  	LEAL -1(DX), DX
17497  	DECL SI
17498  	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm8B
17499  	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm8B
17500  
17501  match_extend_back_end_encodeSnappyBetterBlockAsm8B:
17502  	MOVL DX, DI
17503  	SUBL 12(SP), DI
17504  	LEAQ 3(CX)(DI*1), DI
17505  	CMPQ DI, (SP)
17506  	JB   match_dst_size_check_encodeSnappyBetterBlockAsm8B
17507  	MOVQ $0x00000000, ret+56(FP)
17508  	RET
17509  
17510  match_dst_size_check_encodeSnappyBetterBlockAsm8B:
17511  	MOVL DX, DI
17512  	ADDL $0x04, DX
17513  	ADDL $0x04, SI
17514  	MOVQ src_len+32(FP), R8
17515  	SUBL DX, R8
17516  	LEAQ (BX)(DX*1), R9
17517  	LEAQ (BX)(SI*1), R10
17518  
17519  	// matchLen
17520  	XORL R12, R12
17521  
17522  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B:
17523  	CMPL R8, $0x10
17524  	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B
17525  	MOVQ (R9)(R12*1), R11
17526  	MOVQ 8(R9)(R12*1), R13
17527  	XORQ (R10)(R12*1), R11
17528  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B
17529  	XORQ 8(R10)(R12*1), R13
17530  	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B
17531  	LEAL -16(R8), R8
17532  	LEAL 16(R12), R12
17533  	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B
17534  
17535  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B:
17536  #ifdef GOAMD64_v3
17537  	TZCNTQ R13, R13
17538  
17539  #else
17540  	BSFQ R13, R13
17541  
17542  #endif
17543  	SARQ $0x03, R13
17544  	LEAL 8(R12)(R13*1), R12
17545  	JMP  match_nolit_end_encodeSnappyBetterBlockAsm8B
17546  
17547  matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B:
17548  	CMPL R8, $0x08
17549  	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
17550  	MOVQ (R9)(R12*1), R11
17551  	XORQ (R10)(R12*1), R11
17552  	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B
17553  	LEAL -8(R8), R8
17554  	LEAL 8(R12), R12
17555  	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
17556  
17557  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B:
17558  #ifdef GOAMD64_v3
17559  	TZCNTQ R11, R11
17560  
17561  #else
17562  	BSFQ R11, R11
17563  
17564  #endif
17565  	SARQ $0x03, R11
17566  	LEAL (R12)(R11*1), R12
17567  	JMP  match_nolit_end_encodeSnappyBetterBlockAsm8B
17568  
17569  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B:
17570  	CMPL R8, $0x04
17571  	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
17572  	MOVL (R9)(R12*1), R11
17573  	CMPL (R10)(R12*1), R11
17574  	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
17575  	LEAL -4(R8), R8
17576  	LEAL 4(R12), R12
17577  
17578  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B:
17579  	CMPL R8, $0x01
17580  	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
17581  	JB   match_nolit_end_encodeSnappyBetterBlockAsm8B
17582  	MOVW (R9)(R12*1), R11
17583  	CMPW (R10)(R12*1), R11
17584  	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
17585  	LEAL 2(R12), R12
17586  	SUBL $0x02, R8
17587  	JZ   match_nolit_end_encodeSnappyBetterBlockAsm8B
17588  
17589  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B:
17590  	MOVB (R9)(R12*1), R11
17591  	CMPB (R10)(R12*1), R11
17592  	JNE  match_nolit_end_encodeSnappyBetterBlockAsm8B
17593  	LEAL 1(R12), R12
17594  
17595  match_nolit_end_encodeSnappyBetterBlockAsm8B:
17596  	MOVL DX, R8
17597  	SUBL SI, R8
17598  
17599  	// Check if repeat
17600  	MOVL R8, 16(SP)
17601  	MOVL 12(SP), SI
17602  	CMPL SI, DI
17603  	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
17604  	MOVL DI, R9
17605  	MOVL DI, 12(SP)
17606  	LEAQ (BX)(SI*1), R10
17607  	SUBL SI, R9
17608  	LEAL -1(R9), SI
17609  	CMPL SI, $0x3c
17610  	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm8B
17611  	CMPL SI, $0x00000100
17612  	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm8B
17613  	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm8B
17614  
17615  three_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
17616  	MOVB $0xf4, (CX)
17617  	MOVW SI, 1(CX)
17618  	ADDQ $0x03, CX
17619  	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
17620  
17621  two_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
17622  	MOVB $0xf0, (CX)
17623  	MOVB SI, 1(CX)
17624  	ADDQ $0x02, CX
17625  	CMPL SI, $0x40
17626  	JB   memmove_match_emit_encodeSnappyBetterBlockAsm8B
17627  	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
17628  
17629  one_byte_match_emit_encodeSnappyBetterBlockAsm8B:
17630  	SHLB $0x02, SI
17631  	MOVB SI, (CX)
17632  	ADDQ $0x01, CX
17633  
17634  memmove_match_emit_encodeSnappyBetterBlockAsm8B:
17635  	LEAQ (CX)(R9*1), SI
17636  
17637  	// genMemMoveShort
17638  	CMPQ R9, $0x08
17639  	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8
17640  	CMPQ R9, $0x10
17641  	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
17642  	CMPQ R9, $0x20
17643  	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
17644  	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
17645  
17646  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8:
17647  	MOVQ (R10), R11
17648  	MOVQ R11, (CX)
17649  	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
17650  
17651  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
17652  	MOVQ (R10), R11
17653  	MOVQ -8(R10)(R9*1), R10
17654  	MOVQ R11, (CX)
17655  	MOVQ R10, -8(CX)(R9*1)
17656  	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
17657  
17658  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
17659  	MOVOU (R10), X0
17660  	MOVOU -16(R10)(R9*1), X1
17661  	MOVOU X0, (CX)
17662  	MOVOU X1, -16(CX)(R9*1)
17663  	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
17664  
17665  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
17666  	MOVOU (R10), X0
17667  	MOVOU 16(R10), X1
17668  	MOVOU -32(R10)(R9*1), X2
17669  	MOVOU -16(R10)(R9*1), X3
17670  	MOVOU X0, (CX)
17671  	MOVOU X1, 16(CX)
17672  	MOVOU X2, -32(CX)(R9*1)
17673  	MOVOU X3, -16(CX)(R9*1)
17674  
17675  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B:
17676  	MOVQ SI, CX
17677  	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
17678  
17679  memmove_long_match_emit_encodeSnappyBetterBlockAsm8B:
17680  	LEAQ (CX)(R9*1), SI
17681  
17682  	// genMemMoveLong
17683  	MOVOU (R10), X0
17684  	MOVOU 16(R10), X1
17685  	MOVOU -32(R10)(R9*1), X2
17686  	MOVOU -16(R10)(R9*1), X3
17687  	MOVQ  R9, R13
17688  	SHRQ  $0x05, R13
17689  	MOVQ  CX, R11
17690  	ANDL  $0x0000001f, R11
17691  	MOVQ  $0x00000040, R14
17692  	SUBQ  R11, R14
17693  	DECQ  R13
17694  	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
17695  	LEAQ  -32(R10)(R14*1), R11
17696  	LEAQ  -32(CX)(R14*1), R15
17697  
17698  emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
17699  	MOVOU (R11), X4
17700  	MOVOU 16(R11), X5
17701  	MOVOA X4, (R15)
17702  	MOVOA X5, 16(R15)
17703  	ADDQ  $0x20, R15
17704  	ADDQ  $0x20, R11
17705  	ADDQ  $0x20, R14
17706  	DECQ  R13
17707  	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
17708  
17709  emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
17710  	MOVOU -32(R10)(R14*1), X4
17711  	MOVOU -16(R10)(R14*1), X5
17712  	MOVOA X4, -32(CX)(R14*1)
17713  	MOVOA X5, -16(CX)(R14*1)
17714  	ADDQ  $0x20, R14
17715  	CMPQ  R9, R14
17716  	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
17717  	MOVOU X0, (CX)
17718  	MOVOU X1, 16(CX)
17719  	MOVOU X2, -32(CX)(R9*1)
17720  	MOVOU X3, -16(CX)(R9*1)
17721  	MOVQ  SI, CX
17722  
17723  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B:
17724  	ADDL R12, DX
17725  	ADDL $0x04, R12
17726  	MOVL DX, 12(SP)
17727  
17728  	// emitCopy
17729  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B:
17730  	CMPL R12, $0x40
17731  	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B
17732  	MOVB $0xee, (CX)
17733  	MOVW R8, 1(CX)
17734  	LEAL -60(R12), R12
17735  	ADDQ $0x03, CX
17736  	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B
17737  
17738  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B:
17739  	MOVL R12, SI
17740  	SHLL $0x02, SI
17741  	CMPL R12, $0x0c
17742  	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B
17743  	LEAL -15(SI), SI
17744  	MOVB R8, 1(CX)
17745  	SHRL $0x08, R8
17746  	SHLL $0x05, R8
17747  	ORL  R8, SI
17748  	MOVB SI, (CX)
17749  	ADDQ $0x02, CX
17750  	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B
17751  
17752  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B:
17753  	LEAL -2(SI), SI
17754  	MOVB SI, (CX)
17755  	MOVW R8, 1(CX)
17756  	ADDQ $0x03, CX
17757  
17758  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B:
17759  	CMPL DX, 8(SP)
17760  	JAE  emit_remainder_encodeSnappyBetterBlockAsm8B
17761  	CMPQ CX, (SP)
17762  	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B
17763  	MOVQ $0x00000000, ret+56(FP)
17764  	RET
17765  
17766  match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B:
17767  	MOVQ  $0x0000cf1bbcdcbf9b, SI
17768  	MOVQ  $0x9e3779b1, R8
17769  	LEAQ  1(DI), DI
17770  	LEAQ  -2(DX), R9
17771  	MOVQ  (BX)(DI*1), R10
17772  	MOVQ  1(BX)(DI*1), R11
17773  	MOVQ  (BX)(R9*1), R12
17774  	MOVQ  1(BX)(R9*1), R13
17775  	SHLQ  $0x10, R10
17776  	IMULQ SI, R10
17777  	SHRQ  $0x36, R10
17778  	SHLQ  $0x20, R11
17779  	IMULQ R8, R11
17780  	SHRQ  $0x38, R11
17781  	SHLQ  $0x10, R12
17782  	IMULQ SI, R12
17783  	SHRQ  $0x36, R12
17784  	SHLQ  $0x20, R13
17785  	IMULQ R8, R13
17786  	SHRQ  $0x38, R13
17787  	LEAQ  1(DI), R8
17788  	LEAQ  1(R9), R14
17789  	MOVL  DI, (AX)(R10*4)
17790  	MOVL  R9, (AX)(R12*4)
17791  	MOVL  R8, 4096(AX)(R11*4)
17792  	MOVL  R14, 4096(AX)(R13*4)
17793  	LEAQ  1(R9)(DI*1), R8
17794  	SHRQ  $0x01, R8
17795  	ADDQ  $0x01, DI
17796  	SUBQ  $0x01, R9
17797  
17798  index_loop_encodeSnappyBetterBlockAsm8B:
17799  	CMPQ  R8, R9
17800  	JAE   search_loop_encodeSnappyBetterBlockAsm8B
17801  	MOVQ  (BX)(DI*1), R10
17802  	MOVQ  (BX)(R8*1), R11
17803  	SHLQ  $0x10, R10
17804  	IMULQ SI, R10
17805  	SHRQ  $0x36, R10
17806  	SHLQ  $0x10, R11
17807  	IMULQ SI, R11
17808  	SHRQ  $0x36, R11
17809  	MOVL  DI, (AX)(R10*4)
17810  	MOVL  R8, (AX)(R11*4)
17811  	ADDQ  $0x02, DI
17812  	ADDQ  $0x02, R8
17813  	JMP   index_loop_encodeSnappyBetterBlockAsm8B
17814  
17815  emit_remainder_encodeSnappyBetterBlockAsm8B:
17816  	MOVQ src_len+32(FP), AX
17817  	SUBL 12(SP), AX
17818  	LEAQ 3(CX)(AX*1), AX
17819  	CMPQ AX, (SP)
17820  	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm8B
17821  	MOVQ $0x00000000, ret+56(FP)
17822  	RET
17823  
17824  emit_remainder_ok_encodeSnappyBetterBlockAsm8B:
17825  	MOVQ src_len+32(FP), AX
17826  	MOVL 12(SP), DX
17827  	CMPL DX, AX
17828  	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
17829  	MOVL AX, SI
17830  	MOVL AX, 12(SP)
17831  	LEAQ (BX)(DX*1), AX
17832  	SUBL DX, SI
17833  	LEAL -1(SI), DX
17834  	CMPL DX, $0x3c
17835  	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B
17836  	CMPL DX, $0x00000100
17837  	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B
17838  	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B
17839  
17840  three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
17841  	MOVB $0xf4, (CX)
17842  	MOVW DX, 1(CX)
17843  	ADDQ $0x03, CX
17844  	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
17845  
17846  two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
17847  	MOVB $0xf0, (CX)
17848  	MOVB DL, 1(CX)
17849  	ADDQ $0x02, CX
17850  	CMPL DX, $0x40
17851  	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm8B
17852  	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
17853  
17854  one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B:
17855  	SHLB $0x02, DL
17856  	MOVB DL, (CX)
17857  	ADDQ $0x01, CX
17858  
17859  memmove_emit_remainder_encodeSnappyBetterBlockAsm8B:
17860  	LEAQ (CX)(SI*1), DX
17861  	MOVL SI, BX
17862  
17863  	// genMemMoveShort
17864  	CMPQ BX, $0x03
17865  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2
17866  	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3
17867  	CMPQ BX, $0x08
17868  	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7
17869  	CMPQ BX, $0x10
17870  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
17871  	CMPQ BX, $0x20
17872  	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
17873  	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
17874  
17875  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2:
17876  	MOVB (AX), SI
17877  	MOVB -1(AX)(BX*1), AL
17878  	MOVB SI, (CX)
17879  	MOVB AL, -1(CX)(BX*1)
17880  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
17881  
17882  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3:
17883  	MOVW (AX), SI
17884  	MOVB 2(AX), AL
17885  	MOVW SI, (CX)
17886  	MOVB AL, 2(CX)
17887  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
17888  
17889  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7:
17890  	MOVL (AX), SI
17891  	MOVL -4(AX)(BX*1), AX
17892  	MOVL SI, (CX)
17893  	MOVL AX, -4(CX)(BX*1)
17894  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
17895  
17896  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
17897  	MOVQ (AX), SI
17898  	MOVQ -8(AX)(BX*1), AX
17899  	MOVQ SI, (CX)
17900  	MOVQ AX, -8(CX)(BX*1)
17901  	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
17902  
17903  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
17904  	MOVOU (AX), X0
17905  	MOVOU -16(AX)(BX*1), X1
17906  	MOVOU X0, (CX)
17907  	MOVOU X1, -16(CX)(BX*1)
17908  	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
17909  
17910  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
17911  	MOVOU (AX), X0
17912  	MOVOU 16(AX), X1
17913  	MOVOU -32(AX)(BX*1), X2
17914  	MOVOU -16(AX)(BX*1), X3
17915  	MOVOU X0, (CX)
17916  	MOVOU X1, 16(CX)
17917  	MOVOU X2, -32(CX)(BX*1)
17918  	MOVOU X3, -16(CX)(BX*1)
17919  
17920  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B:
17921  	MOVQ DX, CX
17922  	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
17923  
17924  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B:
17925  	LEAQ (CX)(SI*1), DX
17926  	MOVL SI, BX
17927  
17928  	// genMemMoveLong
17929  	MOVOU (AX), X0
17930  	MOVOU 16(AX), X1
17931  	MOVOU -32(AX)(BX*1), X2
17932  	MOVOU -16(AX)(BX*1), X3
17933  	MOVQ  BX, DI
17934  	SHRQ  $0x05, DI
17935  	MOVQ  CX, SI
17936  	ANDL  $0x0000001f, SI
17937  	MOVQ  $0x00000040, R8
17938  	SUBQ  SI, R8
17939  	DECQ  DI
17940  	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
17941  	LEAQ  -32(AX)(R8*1), SI
17942  	LEAQ  -32(CX)(R8*1), R9
17943  
17944  emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
17945  	MOVOU (SI), X4
17946  	MOVOU 16(SI), X5
17947  	MOVOA X4, (R9)
17948  	MOVOA X5, 16(R9)
17949  	ADDQ  $0x20, R9
17950  	ADDQ  $0x20, SI
17951  	ADDQ  $0x20, R8
17952  	DECQ  DI
17953  	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
17954  
17955  emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
17956  	MOVOU -32(AX)(R8*1), X4
17957  	MOVOU -16(AX)(R8*1), X5
17958  	MOVOA X4, -32(CX)(R8*1)
17959  	MOVOA X5, -16(CX)(R8*1)
17960  	ADDQ  $0x20, R8
17961  	CMPQ  BX, R8
17962  	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
17963  	MOVOU X0, (CX)
17964  	MOVOU X1, 16(CX)
17965  	MOVOU X2, -32(CX)(BX*1)
17966  	MOVOU X3, -16(CX)(BX*1)
17967  	MOVQ  DX, CX
17968  
17969  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B:
17970  	MOVQ dst_base+0(FP), AX
17971  	SUBQ AX, CX
17972  	MOVQ CX, ret+56(FP)
17973  	RET
17974  
17975  // func calcBlockSize(src []byte, tmp *[32768]byte) int
17976  // Requires: BMI, SSE2
17977  TEXT ·calcBlockSize(SB), $24-40
17978  	MOVQ tmp+24(FP), AX
17979  	XORQ CX, CX
17980  	MOVQ $0x00000100, DX
17981  	MOVQ AX, BX
17982  	PXOR X0, X0
17983  
17984  zero_loop_calcBlockSize:
17985  	MOVOU X0, (BX)
17986  	MOVOU X0, 16(BX)
17987  	MOVOU X0, 32(BX)
17988  	MOVOU X0, 48(BX)
17989  	MOVOU X0, 64(BX)
17990  	MOVOU X0, 80(BX)
17991  	MOVOU X0, 96(BX)
17992  	MOVOU X0, 112(BX)
17993  	ADDQ  $0x80, BX
17994  	DECQ  DX
17995  	JNZ   zero_loop_calcBlockSize
17996  	MOVL  $0x00000000, 12(SP)
17997  	MOVQ  src_len+8(FP), DX
17998  	LEAQ  -9(DX), BX
17999  	LEAQ  -8(DX), SI
18000  	MOVL  SI, 8(SP)
18001  	SHRQ  $0x05, DX
18002  	SUBL  DX, BX
18003  	LEAQ  (CX)(BX*1), BX
18004  	MOVQ  BX, (SP)
18005  	MOVL  $0x00000001, DX
18006  	MOVL  DX, 16(SP)
18007  	MOVQ  src_base+0(FP), BX
18008  
18009  search_loop_calcBlockSize:
18010  	MOVL  DX, SI
18011  	SUBL  12(SP), SI
18012  	SHRL  $0x05, SI
18013  	LEAL  4(DX)(SI*1), SI
18014  	CMPL  SI, 8(SP)
18015  	JAE   emit_remainder_calcBlockSize
18016  	MOVQ  (BX)(DX*1), DI
18017  	MOVL  SI, 20(SP)
18018  	MOVQ  $0x0000cf1bbcdcbf9b, R9
18019  	MOVQ  DI, R10
18020  	MOVQ  DI, R11
18021  	SHRQ  $0x08, R11
18022  	SHLQ  $0x10, R10
18023  	IMULQ R9, R10
18024  	SHRQ  $0x33, R10
18025  	SHLQ  $0x10, R11
18026  	IMULQ R9, R11
18027  	SHRQ  $0x33, R11
18028  	MOVL  (AX)(R10*4), SI
18029  	MOVL  (AX)(R11*4), R8
18030  	MOVL  DX, (AX)(R10*4)
18031  	LEAL  1(DX), R10
18032  	MOVL  R10, (AX)(R11*4)
18033  	MOVQ  DI, R10
18034  	SHRQ  $0x10, R10
18035  	SHLQ  $0x10, R10
18036  	IMULQ R9, R10
18037  	SHRQ  $0x33, R10
18038  	MOVL  DX, R9
18039  	SUBL  16(SP), R9
18040  	MOVL  1(BX)(R9*1), R11
18041  	MOVQ  DI, R9
18042  	SHRQ  $0x08, R9
18043  	CMPL  R9, R11
18044  	JNE   no_repeat_found_calcBlockSize
18045  	LEAL  1(DX), DI
18046  	MOVL  12(SP), SI
18047  	MOVL  DI, R8
18048  	SUBL  16(SP), R8
18049  	JZ    repeat_extend_back_end_calcBlockSize
18050  
18051  repeat_extend_back_loop_calcBlockSize:
18052  	CMPL DI, SI
18053  	JBE  repeat_extend_back_end_calcBlockSize
18054  	MOVB -1(BX)(R8*1), R9
18055  	MOVB -1(BX)(DI*1), R10
18056  	CMPB R9, R10
18057  	JNE  repeat_extend_back_end_calcBlockSize
18058  	LEAL -1(DI), DI
18059  	DECL R8
18060  	JNZ  repeat_extend_back_loop_calcBlockSize
18061  
18062  repeat_extend_back_end_calcBlockSize:
18063  	MOVL DI, SI
18064  	SUBL 12(SP), SI
18065  	LEAQ 5(CX)(SI*1), SI
18066  	CMPQ SI, (SP)
18067  	JB   repeat_dst_size_check_calcBlockSize
18068  	MOVQ $0x00000000, ret+32(FP)
18069  	RET
18070  
18071  repeat_dst_size_check_calcBlockSize:
18072  	MOVL 12(SP), SI
18073  	CMPL SI, DI
18074  	JEQ  emit_literal_done_repeat_emit_calcBlockSize
18075  	MOVL DI, R8
18076  	MOVL DI, 12(SP)
18077  	LEAQ (BX)(SI*1), R9
18078  	SUBL SI, R8
18079  	LEAL -1(R8), SI
18080  	CMPL SI, $0x3c
18081  	JB   one_byte_repeat_emit_calcBlockSize
18082  	CMPL SI, $0x00000100
18083  	JB   two_bytes_repeat_emit_calcBlockSize
18084  	CMPL SI, $0x00010000
18085  	JB   three_bytes_repeat_emit_calcBlockSize
18086  	CMPL SI, $0x01000000
18087  	JB   four_bytes_repeat_emit_calcBlockSize
18088  	ADDQ $0x05, CX
18089  	JMP  memmove_long_repeat_emit_calcBlockSize
18090  
18091  four_bytes_repeat_emit_calcBlockSize:
18092  	ADDQ $0x04, CX
18093  	JMP  memmove_long_repeat_emit_calcBlockSize
18094  
18095  three_bytes_repeat_emit_calcBlockSize:
18096  	ADDQ $0x03, CX
18097  	JMP  memmove_long_repeat_emit_calcBlockSize
18098  
18099  two_bytes_repeat_emit_calcBlockSize:
18100  	ADDQ $0x02, CX
18101  	CMPL SI, $0x40
18102  	JB   memmove_repeat_emit_calcBlockSize
18103  	JMP  memmove_long_repeat_emit_calcBlockSize
18104  
18105  one_byte_repeat_emit_calcBlockSize:
18106  	ADDQ $0x01, CX
18107  
18108  memmove_repeat_emit_calcBlockSize:
18109  	LEAQ (CX)(R8*1), CX
18110  	JMP  emit_literal_done_repeat_emit_calcBlockSize
18111  
18112  memmove_long_repeat_emit_calcBlockSize:
18113  	LEAQ (CX)(R8*1), CX
18114  
18115  emit_literal_done_repeat_emit_calcBlockSize:
18116  	ADDL $0x05, DX
18117  	MOVL DX, SI
18118  	SUBL 16(SP), SI
18119  	MOVQ src_len+8(FP), R8
18120  	SUBL DX, R8
18121  	LEAQ (BX)(DX*1), R9
18122  	LEAQ (BX)(SI*1), SI
18123  
18124  	// matchLen
18125  	XORL R11, R11
18126  
18127  matchlen_loopback_16_repeat_extend_calcBlockSize:
18128  	CMPL R8, $0x10
18129  	JB   matchlen_match8_repeat_extend_calcBlockSize
18130  	MOVQ (R9)(R11*1), R10
18131  	MOVQ 8(R9)(R11*1), R12
18132  	XORQ (SI)(R11*1), R10
18133  	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSize
18134  	XORQ 8(SI)(R11*1), R12
18135  	JNZ  matchlen_bsf_16repeat_extend_calcBlockSize
18136  	LEAL -16(R8), R8
18137  	LEAL 16(R11), R11
18138  	JMP  matchlen_loopback_16_repeat_extend_calcBlockSize
18139  
18140  matchlen_bsf_16repeat_extend_calcBlockSize:
18141  #ifdef GOAMD64_v3
18142  	TZCNTQ R12, R12
18143  
18144  #else
18145  	BSFQ R12, R12
18146  
18147  #endif
18148  	SARQ $0x03, R12
18149  	LEAL 8(R11)(R12*1), R11
18150  	JMP  repeat_extend_forward_end_calcBlockSize
18151  
18152  matchlen_match8_repeat_extend_calcBlockSize:
18153  	CMPL R8, $0x08
18154  	JB   matchlen_match4_repeat_extend_calcBlockSize
18155  	MOVQ (R9)(R11*1), R10
18156  	XORQ (SI)(R11*1), R10
18157  	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSize
18158  	LEAL -8(R8), R8
18159  	LEAL 8(R11), R11
18160  	JMP  matchlen_match4_repeat_extend_calcBlockSize
18161  
18162  matchlen_bsf_8_repeat_extend_calcBlockSize:
18163  #ifdef GOAMD64_v3
18164  	TZCNTQ R10, R10
18165  
18166  #else
18167  	BSFQ R10, R10
18168  
18169  #endif
18170  	SARQ $0x03, R10
18171  	LEAL (R11)(R10*1), R11
18172  	JMP  repeat_extend_forward_end_calcBlockSize
18173  
18174  matchlen_match4_repeat_extend_calcBlockSize:
18175  	CMPL R8, $0x04
18176  	JB   matchlen_match2_repeat_extend_calcBlockSize
18177  	MOVL (R9)(R11*1), R10
18178  	CMPL (SI)(R11*1), R10
18179  	JNE  matchlen_match2_repeat_extend_calcBlockSize
18180  	LEAL -4(R8), R8
18181  	LEAL 4(R11), R11
18182  
18183  matchlen_match2_repeat_extend_calcBlockSize:
18184  	CMPL R8, $0x01
18185  	JE   matchlen_match1_repeat_extend_calcBlockSize
18186  	JB   repeat_extend_forward_end_calcBlockSize
18187  	MOVW (R9)(R11*1), R10
18188  	CMPW (SI)(R11*1), R10
18189  	JNE  matchlen_match1_repeat_extend_calcBlockSize
18190  	LEAL 2(R11), R11
18191  	SUBL $0x02, R8
18192  	JZ   repeat_extend_forward_end_calcBlockSize
18193  
18194  matchlen_match1_repeat_extend_calcBlockSize:
18195  	MOVB (R9)(R11*1), R10
18196  	CMPB (SI)(R11*1), R10
18197  	JNE  repeat_extend_forward_end_calcBlockSize
18198  	LEAL 1(R11), R11
18199  
18200  repeat_extend_forward_end_calcBlockSize:
18201  	ADDL R11, DX
18202  	MOVL DX, SI
18203  	SUBL DI, SI
18204  	MOVL 16(SP), DI
18205  
18206  	// emitCopy
18207  	CMPL DI, $0x00010000
18208  	JB   two_byte_offset_repeat_as_copy_calcBlockSize
18209  
18210  four_bytes_loop_back_repeat_as_copy_calcBlockSize:
18211  	CMPL SI, $0x40
18212  	JBE  four_bytes_remain_repeat_as_copy_calcBlockSize
18213  	LEAL -64(SI), SI
18214  	ADDQ $0x05, CX
18215  	CMPL SI, $0x04
18216  	JB   four_bytes_remain_repeat_as_copy_calcBlockSize
18217  	JMP  four_bytes_loop_back_repeat_as_copy_calcBlockSize
18218  
18219  four_bytes_remain_repeat_as_copy_calcBlockSize:
18220  	TESTL SI, SI
18221  	JZ    repeat_end_emit_calcBlockSize
18222  	XORL  SI, SI
18223  	ADDQ  $0x05, CX
18224  	JMP   repeat_end_emit_calcBlockSize
18225  
18226  two_byte_offset_repeat_as_copy_calcBlockSize:
18227  	CMPL SI, $0x40
18228  	JBE  two_byte_offset_short_repeat_as_copy_calcBlockSize
18229  	LEAL -60(SI), SI
18230  	ADDQ $0x03, CX
18231  	JMP  two_byte_offset_repeat_as_copy_calcBlockSize
18232  
18233  two_byte_offset_short_repeat_as_copy_calcBlockSize:
18234  	MOVL SI, R8
18235  	SHLL $0x02, R8
18236  	CMPL SI, $0x0c
18237  	JAE  emit_copy_three_repeat_as_copy_calcBlockSize
18238  	CMPL DI, $0x00000800
18239  	JAE  emit_copy_three_repeat_as_copy_calcBlockSize
18240  	ADDQ $0x02, CX
18241  	JMP  repeat_end_emit_calcBlockSize
18242  
18243  emit_copy_three_repeat_as_copy_calcBlockSize:
18244  	ADDQ $0x03, CX
18245  
18246  repeat_end_emit_calcBlockSize:
18247  	MOVL DX, 12(SP)
18248  	JMP  search_loop_calcBlockSize
18249  
18250  no_repeat_found_calcBlockSize:
18251  	CMPL (BX)(SI*1), DI
18252  	JEQ  candidate_match_calcBlockSize
18253  	SHRQ $0x08, DI
18254  	MOVL (AX)(R10*4), SI
18255  	LEAL 2(DX), R9
18256  	CMPL (BX)(R8*1), DI
18257  	JEQ  candidate2_match_calcBlockSize
18258  	MOVL R9, (AX)(R10*4)
18259  	SHRQ $0x08, DI
18260  	CMPL (BX)(SI*1), DI
18261  	JEQ  candidate3_match_calcBlockSize
18262  	MOVL 20(SP), DX
18263  	JMP  search_loop_calcBlockSize
18264  
18265  candidate3_match_calcBlockSize:
18266  	ADDL $0x02, DX
18267  	JMP  candidate_match_calcBlockSize
18268  
18269  candidate2_match_calcBlockSize:
18270  	MOVL R9, (AX)(R10*4)
18271  	INCL DX
18272  	MOVL R8, SI
18273  
18274  candidate_match_calcBlockSize:
18275  	MOVL  12(SP), DI
18276  	TESTL SI, SI
18277  	JZ    match_extend_back_end_calcBlockSize
18278  
18279  match_extend_back_loop_calcBlockSize:
18280  	CMPL DX, DI
18281  	JBE  match_extend_back_end_calcBlockSize
18282  	MOVB -1(BX)(SI*1), R8
18283  	MOVB -1(BX)(DX*1), R9
18284  	CMPB R8, R9
18285  	JNE  match_extend_back_end_calcBlockSize
18286  	LEAL -1(DX), DX
18287  	DECL SI
18288  	JZ   match_extend_back_end_calcBlockSize
18289  	JMP  match_extend_back_loop_calcBlockSize
18290  
18291  match_extend_back_end_calcBlockSize:
18292  	MOVL DX, DI
18293  	SUBL 12(SP), DI
18294  	LEAQ 5(CX)(DI*1), DI
18295  	CMPQ DI, (SP)
18296  	JB   match_dst_size_check_calcBlockSize
18297  	MOVQ $0x00000000, ret+32(FP)
18298  	RET
18299  
18300  match_dst_size_check_calcBlockSize:
18301  	MOVL DX, DI
18302  	MOVL 12(SP), R8
18303  	CMPL R8, DI
18304  	JEQ  emit_literal_done_match_emit_calcBlockSize
18305  	MOVL DI, R9
18306  	MOVL DI, 12(SP)
18307  	LEAQ (BX)(R8*1), DI
18308  	SUBL R8, R9
18309  	LEAL -1(R9), DI
18310  	CMPL DI, $0x3c
18311  	JB   one_byte_match_emit_calcBlockSize
18312  	CMPL DI, $0x00000100
18313  	JB   two_bytes_match_emit_calcBlockSize
18314  	CMPL DI, $0x00010000
18315  	JB   three_bytes_match_emit_calcBlockSize
18316  	CMPL DI, $0x01000000
18317  	JB   four_bytes_match_emit_calcBlockSize
18318  	ADDQ $0x05, CX
18319  	JMP  memmove_long_match_emit_calcBlockSize
18320  
18321  four_bytes_match_emit_calcBlockSize:
18322  	ADDQ $0x04, CX
18323  	JMP  memmove_long_match_emit_calcBlockSize
18324  
18325  three_bytes_match_emit_calcBlockSize:
18326  	ADDQ $0x03, CX
18327  	JMP  memmove_long_match_emit_calcBlockSize
18328  
18329  two_bytes_match_emit_calcBlockSize:
18330  	ADDQ $0x02, CX
18331  	CMPL DI, $0x40
18332  	JB   memmove_match_emit_calcBlockSize
18333  	JMP  memmove_long_match_emit_calcBlockSize
18334  
18335  one_byte_match_emit_calcBlockSize:
18336  	ADDQ $0x01, CX
18337  
18338  memmove_match_emit_calcBlockSize:
18339  	LEAQ (CX)(R9*1), CX
18340  	JMP  emit_literal_done_match_emit_calcBlockSize
18341  
18342  memmove_long_match_emit_calcBlockSize:
18343  	LEAQ (CX)(R9*1), CX
18344  
18345  emit_literal_done_match_emit_calcBlockSize:
18346  match_nolit_loop_calcBlockSize:
18347  	MOVL DX, DI
18348  	SUBL SI, DI
18349  	MOVL DI, 16(SP)
18350  	ADDL $0x04, DX
18351  	ADDL $0x04, SI
18352  	MOVQ src_len+8(FP), DI
18353  	SUBL DX, DI
18354  	LEAQ (BX)(DX*1), R8
18355  	LEAQ (BX)(SI*1), SI
18356  
18357  	// matchLen
18358  	XORL R10, R10
18359  
18360  matchlen_loopback_16_match_nolit_calcBlockSize:
18361  	CMPL DI, $0x10
18362  	JB   matchlen_match8_match_nolit_calcBlockSize
18363  	MOVQ (R8)(R10*1), R9
18364  	MOVQ 8(R8)(R10*1), R11
18365  	XORQ (SI)(R10*1), R9
18366  	JNZ  matchlen_bsf_8_match_nolit_calcBlockSize
18367  	XORQ 8(SI)(R10*1), R11
18368  	JNZ  matchlen_bsf_16match_nolit_calcBlockSize
18369  	LEAL -16(DI), DI
18370  	LEAL 16(R10), R10
18371  	JMP  matchlen_loopback_16_match_nolit_calcBlockSize
18372  
18373  matchlen_bsf_16match_nolit_calcBlockSize:
18374  #ifdef GOAMD64_v3
18375  	TZCNTQ R11, R11
18376  
18377  #else
18378  	BSFQ R11, R11
18379  
18380  #endif
18381  	SARQ $0x03, R11
18382  	LEAL 8(R10)(R11*1), R10
18383  	JMP  match_nolit_end_calcBlockSize
18384  
18385  matchlen_match8_match_nolit_calcBlockSize:
18386  	CMPL DI, $0x08
18387  	JB   matchlen_match4_match_nolit_calcBlockSize
18388  	MOVQ (R8)(R10*1), R9
18389  	XORQ (SI)(R10*1), R9
18390  	JNZ  matchlen_bsf_8_match_nolit_calcBlockSize
18391  	LEAL -8(DI), DI
18392  	LEAL 8(R10), R10
18393  	JMP  matchlen_match4_match_nolit_calcBlockSize
18394  
18395  matchlen_bsf_8_match_nolit_calcBlockSize:
18396  #ifdef GOAMD64_v3
18397  	TZCNTQ R9, R9
18398  
18399  #else
18400  	BSFQ R9, R9
18401  
18402  #endif
18403  	SARQ $0x03, R9
18404  	LEAL (R10)(R9*1), R10
18405  	JMP  match_nolit_end_calcBlockSize
18406  
18407  matchlen_match4_match_nolit_calcBlockSize:
18408  	CMPL DI, $0x04
18409  	JB   matchlen_match2_match_nolit_calcBlockSize
18410  	MOVL (R8)(R10*1), R9
18411  	CMPL (SI)(R10*1), R9
18412  	JNE  matchlen_match2_match_nolit_calcBlockSize
18413  	LEAL -4(DI), DI
18414  	LEAL 4(R10), R10
18415  
18416  matchlen_match2_match_nolit_calcBlockSize:
18417  	CMPL DI, $0x01
18418  	JE   matchlen_match1_match_nolit_calcBlockSize
18419  	JB   match_nolit_end_calcBlockSize
18420  	MOVW (R8)(R10*1), R9
18421  	CMPW (SI)(R10*1), R9
18422  	JNE  matchlen_match1_match_nolit_calcBlockSize
18423  	LEAL 2(R10), R10
18424  	SUBL $0x02, DI
18425  	JZ   match_nolit_end_calcBlockSize
18426  
18427  matchlen_match1_match_nolit_calcBlockSize:
18428  	MOVB (R8)(R10*1), R9
18429  	CMPB (SI)(R10*1), R9
18430  	JNE  match_nolit_end_calcBlockSize
18431  	LEAL 1(R10), R10
18432  
18433  match_nolit_end_calcBlockSize:
18434  	ADDL R10, DX
18435  	MOVL 16(SP), SI
18436  	ADDL $0x04, R10
18437  	MOVL DX, 12(SP)
18438  
18439  	// emitCopy
18440  	CMPL SI, $0x00010000
18441  	JB   two_byte_offset_match_nolit_calcBlockSize
18442  
18443  four_bytes_loop_back_match_nolit_calcBlockSize:
18444  	CMPL R10, $0x40
18445  	JBE  four_bytes_remain_match_nolit_calcBlockSize
18446  	LEAL -64(R10), R10
18447  	ADDQ $0x05, CX
18448  	CMPL R10, $0x04
18449  	JB   four_bytes_remain_match_nolit_calcBlockSize
18450  	JMP  four_bytes_loop_back_match_nolit_calcBlockSize
18451  
18452  four_bytes_remain_match_nolit_calcBlockSize:
18453  	TESTL R10, R10
18454  	JZ    match_nolit_emitcopy_end_calcBlockSize
18455  	XORL  SI, SI
18456  	ADDQ  $0x05, CX
18457  	JMP   match_nolit_emitcopy_end_calcBlockSize
18458  
18459  two_byte_offset_match_nolit_calcBlockSize:
18460  	CMPL R10, $0x40
18461  	JBE  two_byte_offset_short_match_nolit_calcBlockSize
18462  	LEAL -60(R10), R10
18463  	ADDQ $0x03, CX
18464  	JMP  two_byte_offset_match_nolit_calcBlockSize
18465  
18466  two_byte_offset_short_match_nolit_calcBlockSize:
18467  	MOVL R10, DI
18468  	SHLL $0x02, DI
18469  	CMPL R10, $0x0c
18470  	JAE  emit_copy_three_match_nolit_calcBlockSize
18471  	CMPL SI, $0x00000800
18472  	JAE  emit_copy_three_match_nolit_calcBlockSize
18473  	ADDQ $0x02, CX
18474  	JMP  match_nolit_emitcopy_end_calcBlockSize
18475  
18476  emit_copy_three_match_nolit_calcBlockSize:
18477  	ADDQ $0x03, CX
18478  
18479  match_nolit_emitcopy_end_calcBlockSize:
18480  	CMPL DX, 8(SP)
18481  	JAE  emit_remainder_calcBlockSize
18482  	MOVQ -2(BX)(DX*1), DI
18483  	CMPQ CX, (SP)
18484  	JB   match_nolit_dst_ok_calcBlockSize
18485  	MOVQ $0x00000000, ret+32(FP)
18486  	RET
18487  
18488  match_nolit_dst_ok_calcBlockSize:
18489  	MOVQ  $0x0000cf1bbcdcbf9b, R9
18490  	MOVQ  DI, R8
18491  	SHRQ  $0x10, DI
18492  	MOVQ  DI, SI
18493  	SHLQ  $0x10, R8
18494  	IMULQ R9, R8
18495  	SHRQ  $0x33, R8
18496  	SHLQ  $0x10, SI
18497  	IMULQ R9, SI
18498  	SHRQ  $0x33, SI
18499  	LEAL  -2(DX), R9
18500  	LEAQ  (AX)(SI*4), R10
18501  	MOVL  (R10), SI
18502  	MOVL  R9, (AX)(R8*4)
18503  	MOVL  DX, (R10)
18504  	CMPL  (BX)(SI*1), DI
18505  	JEQ   match_nolit_loop_calcBlockSize
18506  	INCL  DX
18507  	JMP   search_loop_calcBlockSize
18508  
18509  emit_remainder_calcBlockSize:
18510  	MOVQ src_len+8(FP), AX
18511  	SUBL 12(SP), AX
18512  	LEAQ 5(CX)(AX*1), AX
18513  	CMPQ AX, (SP)
18514  	JB   emit_remainder_ok_calcBlockSize
18515  	MOVQ $0x00000000, ret+32(FP)
18516  	RET
18517  
18518  emit_remainder_ok_calcBlockSize:
18519  	MOVQ src_len+8(FP), AX
18520  	MOVL 12(SP), DX
18521  	CMPL DX, AX
18522  	JEQ  emit_literal_done_emit_remainder_calcBlockSize
18523  	MOVL AX, SI
18524  	MOVL AX, 12(SP)
18525  	LEAQ (BX)(DX*1), AX
18526  	SUBL DX, SI
18527  	LEAL -1(SI), AX
18528  	CMPL AX, $0x3c
18529  	JB   one_byte_emit_remainder_calcBlockSize
18530  	CMPL AX, $0x00000100
18531  	JB   two_bytes_emit_remainder_calcBlockSize
18532  	CMPL AX, $0x00010000
18533  	JB   three_bytes_emit_remainder_calcBlockSize
18534  	CMPL AX, $0x01000000
18535  	JB   four_bytes_emit_remainder_calcBlockSize
18536  	ADDQ $0x05, CX
18537  	JMP  memmove_long_emit_remainder_calcBlockSize
18538  
18539  four_bytes_emit_remainder_calcBlockSize:
18540  	ADDQ $0x04, CX
18541  	JMP  memmove_long_emit_remainder_calcBlockSize
18542  
18543  three_bytes_emit_remainder_calcBlockSize:
18544  	ADDQ $0x03, CX
18545  	JMP  memmove_long_emit_remainder_calcBlockSize
18546  
18547  two_bytes_emit_remainder_calcBlockSize:
18548  	ADDQ $0x02, CX
18549  	CMPL AX, $0x40
18550  	JB   memmove_emit_remainder_calcBlockSize
18551  	JMP  memmove_long_emit_remainder_calcBlockSize
18552  
18553  one_byte_emit_remainder_calcBlockSize:
18554  	ADDQ $0x01, CX
18555  
18556  memmove_emit_remainder_calcBlockSize:
18557  	LEAQ (CX)(SI*1), AX
18558  	MOVQ AX, CX
18559  	JMP  emit_literal_done_emit_remainder_calcBlockSize
18560  
18561  memmove_long_emit_remainder_calcBlockSize:
18562  	LEAQ (CX)(SI*1), AX
18563  	MOVQ AX, CX
18564  
18565  emit_literal_done_emit_remainder_calcBlockSize:
18566  	MOVQ CX, ret+32(FP)
18567  	RET
18568  
18569  // func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int
18570  // Requires: BMI, SSE2
18571  TEXT ·calcBlockSizeSmall(SB), $24-40
18572  	MOVQ tmp+24(FP), AX
18573  	XORQ CX, CX
18574  	MOVQ $0x00000010, DX
18575  	MOVQ AX, BX
18576  	PXOR X0, X0
18577  
18578  zero_loop_calcBlockSizeSmall:
18579  	MOVOU X0, (BX)
18580  	MOVOU X0, 16(BX)
18581  	MOVOU X0, 32(BX)
18582  	MOVOU X0, 48(BX)
18583  	MOVOU X0, 64(BX)
18584  	MOVOU X0, 80(BX)
18585  	MOVOU X0, 96(BX)
18586  	MOVOU X0, 112(BX)
18587  	ADDQ  $0x80, BX
18588  	DECQ  DX
18589  	JNZ   zero_loop_calcBlockSizeSmall
18590  	MOVL  $0x00000000, 12(SP)
18591  	MOVQ  src_len+8(FP), DX
18592  	LEAQ  -9(DX), BX
18593  	LEAQ  -8(DX), SI
18594  	MOVL  SI, 8(SP)
18595  	SHRQ  $0x05, DX
18596  	SUBL  DX, BX
18597  	LEAQ  (CX)(BX*1), BX
18598  	MOVQ  BX, (SP)
18599  	MOVL  $0x00000001, DX
18600  	MOVL  DX, 16(SP)
18601  	MOVQ  src_base+0(FP), BX
18602  
18603  search_loop_calcBlockSizeSmall:
18604  	MOVL  DX, SI
18605  	SUBL  12(SP), SI
18606  	SHRL  $0x04, SI
18607  	LEAL  4(DX)(SI*1), SI
18608  	CMPL  SI, 8(SP)
18609  	JAE   emit_remainder_calcBlockSizeSmall
18610  	MOVQ  (BX)(DX*1), DI
18611  	MOVL  SI, 20(SP)
18612  	MOVQ  $0x9e3779b1, R9
18613  	MOVQ  DI, R10
18614  	MOVQ  DI, R11
18615  	SHRQ  $0x08, R11
18616  	SHLQ  $0x20, R10
18617  	IMULQ R9, R10
18618  	SHRQ  $0x37, R10
18619  	SHLQ  $0x20, R11
18620  	IMULQ R9, R11
18621  	SHRQ  $0x37, R11
18622  	MOVL  (AX)(R10*4), SI
18623  	MOVL  (AX)(R11*4), R8
18624  	MOVL  DX, (AX)(R10*4)
18625  	LEAL  1(DX), R10
18626  	MOVL  R10, (AX)(R11*4)
18627  	MOVQ  DI, R10
18628  	SHRQ  $0x10, R10
18629  	SHLQ  $0x20, R10
18630  	IMULQ R9, R10
18631  	SHRQ  $0x37, R10
18632  	MOVL  DX, R9
18633  	SUBL  16(SP), R9
18634  	MOVL  1(BX)(R9*1), R11
18635  	MOVQ  DI, R9
18636  	SHRQ  $0x08, R9
18637  	CMPL  R9, R11
18638  	JNE   no_repeat_found_calcBlockSizeSmall
18639  	LEAL  1(DX), DI
18640  	MOVL  12(SP), SI
18641  	MOVL  DI, R8
18642  	SUBL  16(SP), R8
18643  	JZ    repeat_extend_back_end_calcBlockSizeSmall
18644  
18645  repeat_extend_back_loop_calcBlockSizeSmall:
18646  	CMPL DI, SI
18647  	JBE  repeat_extend_back_end_calcBlockSizeSmall
18648  	MOVB -1(BX)(R8*1), R9
18649  	MOVB -1(BX)(DI*1), R10
18650  	CMPB R9, R10
18651  	JNE  repeat_extend_back_end_calcBlockSizeSmall
18652  	LEAL -1(DI), DI
18653  	DECL R8
18654  	JNZ  repeat_extend_back_loop_calcBlockSizeSmall
18655  
18656  repeat_extend_back_end_calcBlockSizeSmall:
18657  	MOVL DI, SI
18658  	SUBL 12(SP), SI
18659  	LEAQ 3(CX)(SI*1), SI
18660  	CMPQ SI, (SP)
18661  	JB   repeat_dst_size_check_calcBlockSizeSmall
18662  	MOVQ $0x00000000, ret+32(FP)
18663  	RET
18664  
18665  repeat_dst_size_check_calcBlockSizeSmall:
18666  	MOVL 12(SP), SI
18667  	CMPL SI, DI
18668  	JEQ  emit_literal_done_repeat_emit_calcBlockSizeSmall
18669  	MOVL DI, R8
18670  	MOVL DI, 12(SP)
18671  	LEAQ (BX)(SI*1), R9
18672  	SUBL SI, R8
18673  	LEAL -1(R8), SI
18674  	CMPL SI, $0x3c
18675  	JB   one_byte_repeat_emit_calcBlockSizeSmall
18676  	CMPL SI, $0x00000100
18677  	JB   two_bytes_repeat_emit_calcBlockSizeSmall
18678  	JB   three_bytes_repeat_emit_calcBlockSizeSmall
18679  
18680  three_bytes_repeat_emit_calcBlockSizeSmall:
18681  	ADDQ $0x03, CX
18682  	JMP  memmove_long_repeat_emit_calcBlockSizeSmall
18683  
18684  two_bytes_repeat_emit_calcBlockSizeSmall:
18685  	ADDQ $0x02, CX
18686  	CMPL SI, $0x40
18687  	JB   memmove_repeat_emit_calcBlockSizeSmall
18688  	JMP  memmove_long_repeat_emit_calcBlockSizeSmall
18689  
18690  one_byte_repeat_emit_calcBlockSizeSmall:
18691  	ADDQ $0x01, CX
18692  
18693  memmove_repeat_emit_calcBlockSizeSmall:
18694  	LEAQ (CX)(R8*1), CX
18695  	JMP  emit_literal_done_repeat_emit_calcBlockSizeSmall
18696  
18697  memmove_long_repeat_emit_calcBlockSizeSmall:
18698  	LEAQ (CX)(R8*1), CX
18699  
18700  emit_literal_done_repeat_emit_calcBlockSizeSmall:
18701  	ADDL $0x05, DX
18702  	MOVL DX, SI
18703  	SUBL 16(SP), SI
18704  	MOVQ src_len+8(FP), R8
18705  	SUBL DX, R8
18706  	LEAQ (BX)(DX*1), R9
18707  	LEAQ (BX)(SI*1), SI
18708  
18709  	// matchLen
18710  	XORL R11, R11
18711  
18712  matchlen_loopback_16_repeat_extend_calcBlockSizeSmall:
18713  	CMPL R8, $0x10
18714  	JB   matchlen_match8_repeat_extend_calcBlockSizeSmall
18715  	MOVQ (R9)(R11*1), R10
18716  	MOVQ 8(R9)(R11*1), R12
18717  	XORQ (SI)(R11*1), R10
18718  	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSizeSmall
18719  	XORQ 8(SI)(R11*1), R12
18720  	JNZ  matchlen_bsf_16repeat_extend_calcBlockSizeSmall
18721  	LEAL -16(R8), R8
18722  	LEAL 16(R11), R11
18723  	JMP  matchlen_loopback_16_repeat_extend_calcBlockSizeSmall
18724  
18725  matchlen_bsf_16repeat_extend_calcBlockSizeSmall:
18726  #ifdef GOAMD64_v3
18727  	TZCNTQ R12, R12
18728  
18729  #else
18730  	BSFQ R12, R12
18731  
18732  #endif
18733  	SARQ $0x03, R12
18734  	LEAL 8(R11)(R12*1), R11
18735  	JMP  repeat_extend_forward_end_calcBlockSizeSmall
18736  
18737  matchlen_match8_repeat_extend_calcBlockSizeSmall:
18738  	CMPL R8, $0x08
18739  	JB   matchlen_match4_repeat_extend_calcBlockSizeSmall
18740  	MOVQ (R9)(R11*1), R10
18741  	XORQ (SI)(R11*1), R10
18742  	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSizeSmall
18743  	LEAL -8(R8), R8
18744  	LEAL 8(R11), R11
18745  	JMP  matchlen_match4_repeat_extend_calcBlockSizeSmall
18746  
18747  matchlen_bsf_8_repeat_extend_calcBlockSizeSmall:
18748  #ifdef GOAMD64_v3
18749  	TZCNTQ R10, R10
18750  
18751  #else
18752  	BSFQ R10, R10
18753  
18754  #endif
18755  	SARQ $0x03, R10
18756  	LEAL (R11)(R10*1), R11
18757  	JMP  repeat_extend_forward_end_calcBlockSizeSmall
18758  
18759  matchlen_match4_repeat_extend_calcBlockSizeSmall:
18760  	CMPL R8, $0x04
18761  	JB   matchlen_match2_repeat_extend_calcBlockSizeSmall
18762  	MOVL (R9)(R11*1), R10
18763  	CMPL (SI)(R11*1), R10
18764  	JNE  matchlen_match2_repeat_extend_calcBlockSizeSmall
18765  	LEAL -4(R8), R8
18766  	LEAL 4(R11), R11
18767  
18768  matchlen_match2_repeat_extend_calcBlockSizeSmall:
18769  	CMPL R8, $0x01
18770  	JE   matchlen_match1_repeat_extend_calcBlockSizeSmall
18771  	JB   repeat_extend_forward_end_calcBlockSizeSmall
18772  	MOVW (R9)(R11*1), R10
18773  	CMPW (SI)(R11*1), R10
18774  	JNE  matchlen_match1_repeat_extend_calcBlockSizeSmall
18775  	LEAL 2(R11), R11
18776  	SUBL $0x02, R8
18777  	JZ   repeat_extend_forward_end_calcBlockSizeSmall
18778  
18779  matchlen_match1_repeat_extend_calcBlockSizeSmall:
18780  	MOVB (R9)(R11*1), R10
18781  	CMPB (SI)(R11*1), R10
18782  	JNE  repeat_extend_forward_end_calcBlockSizeSmall
18783  	LEAL 1(R11), R11
18784  
18785  repeat_extend_forward_end_calcBlockSizeSmall:
18786  	ADDL R11, DX
18787  	MOVL DX, SI
18788  	SUBL DI, SI
18789  	MOVL 16(SP), DI
18790  
18791  	// emitCopy
18792  two_byte_offset_repeat_as_copy_calcBlockSizeSmall:
18793  	CMPL SI, $0x40
18794  	JBE  two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall
18795  	LEAL -60(SI), SI
18796  	ADDQ $0x03, CX
18797  	JMP  two_byte_offset_repeat_as_copy_calcBlockSizeSmall
18798  
18799  two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall:
18800  	MOVL SI, DI
18801  	SHLL $0x02, DI
18802  	CMPL SI, $0x0c
18803  	JAE  emit_copy_three_repeat_as_copy_calcBlockSizeSmall
18804  	ADDQ $0x02, CX
18805  	JMP  repeat_end_emit_calcBlockSizeSmall
18806  
18807  emit_copy_three_repeat_as_copy_calcBlockSizeSmall:
18808  	ADDQ $0x03, CX
18809  
18810  repeat_end_emit_calcBlockSizeSmall:
18811  	MOVL DX, 12(SP)
18812  	JMP  search_loop_calcBlockSizeSmall
18813  
18814  no_repeat_found_calcBlockSizeSmall:
18815  	CMPL (BX)(SI*1), DI
18816  	JEQ  candidate_match_calcBlockSizeSmall
18817  	SHRQ $0x08, DI
18818  	MOVL (AX)(R10*4), SI
18819  	LEAL 2(DX), R9
18820  	CMPL (BX)(R8*1), DI
18821  	JEQ  candidate2_match_calcBlockSizeSmall
18822  	MOVL R9, (AX)(R10*4)
18823  	SHRQ $0x08, DI
18824  	CMPL (BX)(SI*1), DI
18825  	JEQ  candidate3_match_calcBlockSizeSmall
18826  	MOVL 20(SP), DX
18827  	JMP  search_loop_calcBlockSizeSmall
18828  
18829  candidate3_match_calcBlockSizeSmall:
18830  	ADDL $0x02, DX
18831  	JMP  candidate_match_calcBlockSizeSmall
18832  
18833  candidate2_match_calcBlockSizeSmall:
18834  	MOVL R9, (AX)(R10*4)
18835  	INCL DX
18836  	MOVL R8, SI
18837  
18838  candidate_match_calcBlockSizeSmall:
18839  	MOVL  12(SP), DI
18840  	TESTL SI, SI
18841  	JZ    match_extend_back_end_calcBlockSizeSmall
18842  
18843  match_extend_back_loop_calcBlockSizeSmall:
18844  	CMPL DX, DI
18845  	JBE  match_extend_back_end_calcBlockSizeSmall
18846  	MOVB -1(BX)(SI*1), R8
18847  	MOVB -1(BX)(DX*1), R9
18848  	CMPB R8, R9
18849  	JNE  match_extend_back_end_calcBlockSizeSmall
18850  	LEAL -1(DX), DX
18851  	DECL SI
18852  	JZ   match_extend_back_end_calcBlockSizeSmall
18853  	JMP  match_extend_back_loop_calcBlockSizeSmall
18854  
18855  match_extend_back_end_calcBlockSizeSmall:
18856  	MOVL DX, DI
18857  	SUBL 12(SP), DI
18858  	LEAQ 3(CX)(DI*1), DI
18859  	CMPQ DI, (SP)
18860  	JB   match_dst_size_check_calcBlockSizeSmall
18861  	MOVQ $0x00000000, ret+32(FP)
18862  	RET
18863  
18864  match_dst_size_check_calcBlockSizeSmall:
18865  	MOVL DX, DI
18866  	MOVL 12(SP), R8
18867  	CMPL R8, DI
18868  	JEQ  emit_literal_done_match_emit_calcBlockSizeSmall
18869  	MOVL DI, R9
18870  	MOVL DI, 12(SP)
18871  	LEAQ (BX)(R8*1), DI
18872  	SUBL R8, R9
18873  	LEAL -1(R9), DI
18874  	CMPL DI, $0x3c
18875  	JB   one_byte_match_emit_calcBlockSizeSmall
18876  	CMPL DI, $0x00000100
18877  	JB   two_bytes_match_emit_calcBlockSizeSmall
18878  	JB   three_bytes_match_emit_calcBlockSizeSmall
18879  
18880  three_bytes_match_emit_calcBlockSizeSmall:
18881  	ADDQ $0x03, CX
18882  	JMP  memmove_long_match_emit_calcBlockSizeSmall
18883  
18884  two_bytes_match_emit_calcBlockSizeSmall:
18885  	ADDQ $0x02, CX
18886  	CMPL DI, $0x40
18887  	JB   memmove_match_emit_calcBlockSizeSmall
18888  	JMP  memmove_long_match_emit_calcBlockSizeSmall
18889  
18890  one_byte_match_emit_calcBlockSizeSmall:
18891  	ADDQ $0x01, CX
18892  
18893  memmove_match_emit_calcBlockSizeSmall:
18894  	LEAQ (CX)(R9*1), CX
18895  	JMP  emit_literal_done_match_emit_calcBlockSizeSmall
18896  
18897  memmove_long_match_emit_calcBlockSizeSmall:
18898  	LEAQ (CX)(R9*1), CX
18899  
18900  emit_literal_done_match_emit_calcBlockSizeSmall:
18901  match_nolit_loop_calcBlockSizeSmall:
18902  	MOVL DX, DI
18903  	SUBL SI, DI
18904  	MOVL DI, 16(SP)
18905  	ADDL $0x04, DX
18906  	ADDL $0x04, SI
18907  	MOVQ src_len+8(FP), DI
18908  	SUBL DX, DI
18909  	LEAQ (BX)(DX*1), R8
18910  	LEAQ (BX)(SI*1), SI
18911  
18912  	// matchLen
18913  	XORL R10, R10
18914  
18915  matchlen_loopback_16_match_nolit_calcBlockSizeSmall:
18916  	CMPL DI, $0x10
18917  	JB   matchlen_match8_match_nolit_calcBlockSizeSmall
18918  	MOVQ (R8)(R10*1), R9
18919  	MOVQ 8(R8)(R10*1), R11
18920  	XORQ (SI)(R10*1), R9
18921  	JNZ  matchlen_bsf_8_match_nolit_calcBlockSizeSmall
18922  	XORQ 8(SI)(R10*1), R11
18923  	JNZ  matchlen_bsf_16match_nolit_calcBlockSizeSmall
18924  	LEAL -16(DI), DI
18925  	LEAL 16(R10), R10
18926  	JMP  matchlen_loopback_16_match_nolit_calcBlockSizeSmall
18927  
18928  matchlen_bsf_16match_nolit_calcBlockSizeSmall:
18929  #ifdef GOAMD64_v3
18930  	TZCNTQ R11, R11
18931  
18932  #else
18933  	BSFQ R11, R11
18934  
18935  #endif
18936  	SARQ $0x03, R11
18937  	LEAL 8(R10)(R11*1), R10
18938  	JMP  match_nolit_end_calcBlockSizeSmall
18939  
18940  matchlen_match8_match_nolit_calcBlockSizeSmall:
18941  	CMPL DI, $0x08
18942  	JB   matchlen_match4_match_nolit_calcBlockSizeSmall
18943  	MOVQ (R8)(R10*1), R9
18944  	XORQ (SI)(R10*1), R9
18945  	JNZ  matchlen_bsf_8_match_nolit_calcBlockSizeSmall
18946  	LEAL -8(DI), DI
18947  	LEAL 8(R10), R10
18948  	JMP  matchlen_match4_match_nolit_calcBlockSizeSmall
18949  
18950  matchlen_bsf_8_match_nolit_calcBlockSizeSmall:
18951  #ifdef GOAMD64_v3
18952  	TZCNTQ R9, R9
18953  
18954  #else
18955  	BSFQ R9, R9
18956  
18957  #endif
18958  	SARQ $0x03, R9
18959  	LEAL (R10)(R9*1), R10
18960  	JMP  match_nolit_end_calcBlockSizeSmall
18961  
18962  matchlen_match4_match_nolit_calcBlockSizeSmall:
18963  	CMPL DI, $0x04
18964  	JB   matchlen_match2_match_nolit_calcBlockSizeSmall
18965  	MOVL (R8)(R10*1), R9
18966  	CMPL (SI)(R10*1), R9
18967  	JNE  matchlen_match2_match_nolit_calcBlockSizeSmall
18968  	LEAL -4(DI), DI
18969  	LEAL 4(R10), R10
18970  
18971  matchlen_match2_match_nolit_calcBlockSizeSmall:
18972  	CMPL DI, $0x01
18973  	JE   matchlen_match1_match_nolit_calcBlockSizeSmall
18974  	JB   match_nolit_end_calcBlockSizeSmall
18975  	MOVW (R8)(R10*1), R9
18976  	CMPW (SI)(R10*1), R9
18977  	JNE  matchlen_match1_match_nolit_calcBlockSizeSmall
18978  	LEAL 2(R10), R10
18979  	SUBL $0x02, DI
18980  	JZ   match_nolit_end_calcBlockSizeSmall
18981  
18982  matchlen_match1_match_nolit_calcBlockSizeSmall:
18983  	MOVB (R8)(R10*1), R9
18984  	CMPB (SI)(R10*1), R9
18985  	JNE  match_nolit_end_calcBlockSizeSmall
18986  	LEAL 1(R10), R10
18987  
18988  match_nolit_end_calcBlockSizeSmall:
18989  	ADDL R10, DX
18990  	MOVL 16(SP), SI
18991  	ADDL $0x04, R10
18992  	MOVL DX, 12(SP)
18993  
18994  	// emitCopy
18995  two_byte_offset_match_nolit_calcBlockSizeSmall:
18996  	CMPL R10, $0x40
18997  	JBE  two_byte_offset_short_match_nolit_calcBlockSizeSmall
18998  	LEAL -60(R10), R10
18999  	ADDQ $0x03, CX
19000  	JMP  two_byte_offset_match_nolit_calcBlockSizeSmall
19001  
19002  two_byte_offset_short_match_nolit_calcBlockSizeSmall:
19003  	MOVL R10, SI
19004  	SHLL $0x02, SI
19005  	CMPL R10, $0x0c
19006  	JAE  emit_copy_three_match_nolit_calcBlockSizeSmall
19007  	ADDQ $0x02, CX
19008  	JMP  match_nolit_emitcopy_end_calcBlockSizeSmall
19009  
19010  emit_copy_three_match_nolit_calcBlockSizeSmall:
19011  	ADDQ $0x03, CX
19012  
19013  match_nolit_emitcopy_end_calcBlockSizeSmall:
19014  	CMPL DX, 8(SP)
19015  	JAE  emit_remainder_calcBlockSizeSmall
19016  	MOVQ -2(BX)(DX*1), DI
19017  	CMPQ CX, (SP)
19018  	JB   match_nolit_dst_ok_calcBlockSizeSmall
19019  	MOVQ $0x00000000, ret+32(FP)
19020  	RET
19021  
19022  match_nolit_dst_ok_calcBlockSizeSmall:
19023  	MOVQ  $0x9e3779b1, R9
19024  	MOVQ  DI, R8
19025  	SHRQ  $0x10, DI
19026  	MOVQ  DI, SI
19027  	SHLQ  $0x20, R8
19028  	IMULQ R9, R8
19029  	SHRQ  $0x37, R8
19030  	SHLQ  $0x20, SI
19031  	IMULQ R9, SI
19032  	SHRQ  $0x37, SI
19033  	LEAL  -2(DX), R9
19034  	LEAQ  (AX)(SI*4), R10
19035  	MOVL  (R10), SI
19036  	MOVL  R9, (AX)(R8*4)
19037  	MOVL  DX, (R10)
19038  	CMPL  (BX)(SI*1), DI
19039  	JEQ   match_nolit_loop_calcBlockSizeSmall
19040  	INCL  DX
19041  	JMP   search_loop_calcBlockSizeSmall
19042  
19043  emit_remainder_calcBlockSizeSmall:
19044  	MOVQ src_len+8(FP), AX
19045  	SUBL 12(SP), AX
19046  	LEAQ 3(CX)(AX*1), AX
19047  	CMPQ AX, (SP)
19048  	JB   emit_remainder_ok_calcBlockSizeSmall
19049  	MOVQ $0x00000000, ret+32(FP)
19050  	RET
19051  
19052  emit_remainder_ok_calcBlockSizeSmall:
19053  	MOVQ src_len+8(FP), AX
19054  	MOVL 12(SP), DX
19055  	CMPL DX, AX
19056  	JEQ  emit_literal_done_emit_remainder_calcBlockSizeSmall
19057  	MOVL AX, SI
19058  	MOVL AX, 12(SP)
19059  	LEAQ (BX)(DX*1), AX
19060  	SUBL DX, SI
19061  	LEAL -1(SI), AX
19062  	CMPL AX, $0x3c
19063  	JB   one_byte_emit_remainder_calcBlockSizeSmall
19064  	CMPL AX, $0x00000100
19065  	JB   two_bytes_emit_remainder_calcBlockSizeSmall
19066  	JB   three_bytes_emit_remainder_calcBlockSizeSmall
19067  
19068  three_bytes_emit_remainder_calcBlockSizeSmall:
19069  	ADDQ $0x03, CX
19070  	JMP  memmove_long_emit_remainder_calcBlockSizeSmall
19071  
19072  two_bytes_emit_remainder_calcBlockSizeSmall:
19073  	ADDQ $0x02, CX
19074  	CMPL AX, $0x40
19075  	JB   memmove_emit_remainder_calcBlockSizeSmall
19076  	JMP  memmove_long_emit_remainder_calcBlockSizeSmall
19077  
19078  one_byte_emit_remainder_calcBlockSizeSmall:
19079  	ADDQ $0x01, CX
19080  
19081  memmove_emit_remainder_calcBlockSizeSmall:
19082  	LEAQ (CX)(SI*1), AX
19083  	MOVQ AX, CX
19084  	JMP  emit_literal_done_emit_remainder_calcBlockSizeSmall
19085  
19086  memmove_long_emit_remainder_calcBlockSizeSmall:
19087  	LEAQ (CX)(SI*1), AX
19088  	MOVQ AX, CX
19089  
19090  emit_literal_done_emit_remainder_calcBlockSizeSmall:
19091  	MOVQ CX, ret+32(FP)
19092  	RET
19093  
19094  // func emitLiteral(dst []byte, lit []byte) int
19095  // Requires: SSE2
19096  TEXT ·emitLiteral(SB), NOSPLIT, $0-56
19097  	MOVQ  lit_len+32(FP), DX
19098  	MOVQ  dst_base+0(FP), AX
19099  	MOVQ  lit_base+24(FP), CX
19100  	TESTQ DX, DX
19101  	JZ    emit_literal_end_standalone_skip
19102  	MOVL  DX, BX
19103  	LEAL  -1(DX), SI
19104  	CMPL  SI, $0x3c
19105  	JB    one_byte_standalone
19106  	CMPL  SI, $0x00000100
19107  	JB    two_bytes_standalone
19108  	CMPL  SI, $0x00010000
19109  	JB    three_bytes_standalone
19110  	CMPL  SI, $0x01000000
19111  	JB    four_bytes_standalone
19112  	MOVB  $0xfc, (AX)
19113  	MOVL  SI, 1(AX)
19114  	ADDQ  $0x05, BX
19115  	ADDQ  $0x05, AX
19116  	JMP   memmove_long_standalone
19117  
19118  four_bytes_standalone:
19119  	MOVL SI, DI
19120  	SHRL $0x10, DI
19121  	MOVB $0xf8, (AX)
19122  	MOVW SI, 1(AX)
19123  	MOVB DI, 3(AX)
19124  	ADDQ $0x04, BX
19125  	ADDQ $0x04, AX
19126  	JMP  memmove_long_standalone
19127  
19128  three_bytes_standalone:
19129  	MOVB $0xf4, (AX)
19130  	MOVW SI, 1(AX)
19131  	ADDQ $0x03, BX
19132  	ADDQ $0x03, AX
19133  	JMP  memmove_long_standalone
19134  
19135  two_bytes_standalone:
19136  	MOVB $0xf0, (AX)
19137  	MOVB SI, 1(AX)
19138  	ADDQ $0x02, BX
19139  	ADDQ $0x02, AX
19140  	CMPL SI, $0x40
19141  	JB   memmove_standalone
19142  	JMP  memmove_long_standalone
19143  
19144  one_byte_standalone:
19145  	SHLB $0x02, SI
19146  	MOVB SI, (AX)
19147  	ADDQ $0x01, BX
19148  	ADDQ $0x01, AX
19149  
19150  memmove_standalone:
19151  	// genMemMoveShort
19152  	CMPQ DX, $0x03
19153  	JB   emit_lit_memmove_standalone_memmove_move_1or2
19154  	JE   emit_lit_memmove_standalone_memmove_move_3
19155  	CMPQ DX, $0x08
19156  	JB   emit_lit_memmove_standalone_memmove_move_4through7
19157  	CMPQ DX, $0x10
19158  	JBE  emit_lit_memmove_standalone_memmove_move_8through16
19159  	CMPQ DX, $0x20
19160  	JBE  emit_lit_memmove_standalone_memmove_move_17through32
19161  	JMP  emit_lit_memmove_standalone_memmove_move_33through64
19162  
19163  emit_lit_memmove_standalone_memmove_move_1or2:
19164  	MOVB (CX), SI
19165  	MOVB -1(CX)(DX*1), CL
19166  	MOVB SI, (AX)
19167  	MOVB CL, -1(AX)(DX*1)
19168  	JMP  emit_literal_end_standalone
19169  
19170  emit_lit_memmove_standalone_memmove_move_3:
19171  	MOVW (CX), SI
19172  	MOVB 2(CX), CL
19173  	MOVW SI, (AX)
19174  	MOVB CL, 2(AX)
19175  	JMP  emit_literal_end_standalone
19176  
19177  emit_lit_memmove_standalone_memmove_move_4through7:
19178  	MOVL (CX), SI
19179  	MOVL -4(CX)(DX*1), CX
19180  	MOVL SI, (AX)
19181  	MOVL CX, -4(AX)(DX*1)
19182  	JMP  emit_literal_end_standalone
19183  
19184  emit_lit_memmove_standalone_memmove_move_8through16:
19185  	MOVQ (CX), SI
19186  	MOVQ -8(CX)(DX*1), CX
19187  	MOVQ SI, (AX)
19188  	MOVQ CX, -8(AX)(DX*1)
19189  	JMP  emit_literal_end_standalone
19190  
19191  emit_lit_memmove_standalone_memmove_move_17through32:
19192  	MOVOU (CX), X0
19193  	MOVOU -16(CX)(DX*1), X1
19194  	MOVOU X0, (AX)
19195  	MOVOU X1, -16(AX)(DX*1)
19196  	JMP   emit_literal_end_standalone
19197  
19198  emit_lit_memmove_standalone_memmove_move_33through64:
19199  	MOVOU (CX), X0
19200  	MOVOU 16(CX), X1
19201  	MOVOU -32(CX)(DX*1), X2
19202  	MOVOU -16(CX)(DX*1), X3
19203  	MOVOU X0, (AX)
19204  	MOVOU X1, 16(AX)
19205  	MOVOU X2, -32(AX)(DX*1)
19206  	MOVOU X3, -16(AX)(DX*1)
19207  	JMP   emit_literal_end_standalone
19208  	JMP emit_literal_end_standalone
19209  
19210  memmove_long_standalone:
19211  	// genMemMoveLong
19212  	MOVOU (CX), X0
19213  	MOVOU 16(CX), X1
19214  	MOVOU -32(CX)(DX*1), X2
19215  	MOVOU -16(CX)(DX*1), X3
19216  	MOVQ  DX, DI
19217  	SHRQ  $0x05, DI
19218  	MOVQ  AX, SI
19219  	ANDL  $0x0000001f, SI
19220  	MOVQ  $0x00000040, R8
19221  	SUBQ  SI, R8
19222  	DECQ  DI
19223  	JA    emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
19224  	LEAQ  -32(CX)(R8*1), SI
19225  	LEAQ  -32(AX)(R8*1), R9
19226  
19227  emit_lit_memmove_long_standalonelarge_big_loop_back:
19228  	MOVOU (SI), X4
19229  	MOVOU 16(SI), X5
19230  	MOVOA X4, (R9)
19231  	MOVOA X5, 16(R9)
19232  	ADDQ  $0x20, R9
19233  	ADDQ  $0x20, SI
19234  	ADDQ  $0x20, R8
19235  	DECQ  DI
19236  	JNA   emit_lit_memmove_long_standalonelarge_big_loop_back
19237  
19238  emit_lit_memmove_long_standalonelarge_forward_sse_loop_32:
19239  	MOVOU -32(CX)(R8*1), X4
19240  	MOVOU -16(CX)(R8*1), X5
19241  	MOVOA X4, -32(AX)(R8*1)
19242  	MOVOA X5, -16(AX)(R8*1)
19243  	ADDQ  $0x20, R8
19244  	CMPQ  DX, R8
19245  	JAE   emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
19246  	MOVOU X0, (AX)
19247  	MOVOU X1, 16(AX)
19248  	MOVOU X2, -32(AX)(DX*1)
19249  	MOVOU X3, -16(AX)(DX*1)
19250  	JMP   emit_literal_end_standalone
19251  	JMP emit_literal_end_standalone
19252  
19253  emit_literal_end_standalone_skip:
19254  	XORQ BX, BX
19255  
19256  emit_literal_end_standalone:
19257  	MOVQ BX, ret+48(FP)
19258  	RET
19259  
19260  // func emitRepeat(dst []byte, offset int, length int) int
19261  TEXT ·emitRepeat(SB), NOSPLIT, $0-48
19262  	XORQ BX, BX
19263  	MOVQ dst_base+0(FP), AX
19264  	MOVQ offset+24(FP), CX
19265  	MOVQ length+32(FP), DX
19266  
19267  	// emitRepeat
19268  emit_repeat_again_standalone:
19269  	MOVL DX, SI
19270  	LEAL -4(DX), DX
19271  	CMPL SI, $0x08
19272  	JBE  repeat_two_standalone
19273  	CMPL SI, $0x0c
19274  	JAE  cant_repeat_two_offset_standalone
19275  	CMPL CX, $0x00000800
19276  	JB   repeat_two_offset_standalone
19277  
19278  cant_repeat_two_offset_standalone:
19279  	CMPL DX, $0x00000104
19280  	JB   repeat_three_standalone
19281  	CMPL DX, $0x00010100
19282  	JB   repeat_four_standalone
19283  	CMPL DX, $0x0100ffff
19284  	JB   repeat_five_standalone
19285  	LEAL -16842747(DX), DX
19286  	MOVL $0xfffb001d, (AX)
19287  	MOVB $0xff, 4(AX)
19288  	ADDQ $0x05, AX
19289  	ADDQ $0x05, BX
19290  	JMP  emit_repeat_again_standalone
19291  
19292  repeat_five_standalone:
19293  	LEAL -65536(DX), DX
19294  	MOVL DX, CX
19295  	MOVW $0x001d, (AX)
19296  	MOVW DX, 2(AX)
19297  	SARL $0x10, CX
19298  	MOVB CL, 4(AX)
19299  	ADDQ $0x05, BX
19300  	ADDQ $0x05, AX
19301  	JMP  gen_emit_repeat_end
19302  
19303  repeat_four_standalone:
19304  	LEAL -256(DX), DX
19305  	MOVW $0x0019, (AX)
19306  	MOVW DX, 2(AX)
19307  	ADDQ $0x04, BX
19308  	ADDQ $0x04, AX
19309  	JMP  gen_emit_repeat_end
19310  
19311  repeat_three_standalone:
19312  	LEAL -4(DX), DX
19313  	MOVW $0x0015, (AX)
19314  	MOVB DL, 2(AX)
19315  	ADDQ $0x03, BX
19316  	ADDQ $0x03, AX
19317  	JMP  gen_emit_repeat_end
19318  
19319  repeat_two_standalone:
19320  	SHLL $0x02, DX
19321  	ORL  $0x01, DX
19322  	MOVW DX, (AX)
19323  	ADDQ $0x02, BX
19324  	ADDQ $0x02, AX
19325  	JMP  gen_emit_repeat_end
19326  
19327  repeat_two_offset_standalone:
19328  	XORQ SI, SI
19329  	LEAL 1(SI)(DX*4), DX
19330  	MOVB CL, 1(AX)
19331  	SARL $0x08, CX
19332  	SHLL $0x05, CX
19333  	ORL  CX, DX
19334  	MOVB DL, (AX)
19335  	ADDQ $0x02, BX
19336  	ADDQ $0x02, AX
19337  
19338  gen_emit_repeat_end:
19339  	MOVQ BX, ret+40(FP)
19340  	RET
19341  
19342  // func emitCopy(dst []byte, offset int, length int) int
19343  TEXT ·emitCopy(SB), NOSPLIT, $0-48
19344  	XORQ BX, BX
19345  	MOVQ dst_base+0(FP), AX
19346  	MOVQ offset+24(FP), CX
19347  	MOVQ length+32(FP), DX
19348  
19349  	// emitCopy
19350  	CMPL CX, $0x00010000
19351  	JB   two_byte_offset_standalone
19352  	CMPL DX, $0x40
19353  	JBE  four_bytes_remain_standalone
19354  	MOVB $0xff, (AX)
19355  	MOVL CX, 1(AX)
19356  	LEAL -64(DX), DX
19357  	ADDQ $0x05, BX
19358  	ADDQ $0x05, AX
19359  	CMPL DX, $0x04
19360  	JB   four_bytes_remain_standalone
19361  
19362  	// emitRepeat
19363  emit_repeat_again_standalone_emit_copy:
19364  	MOVL DX, SI
19365  	LEAL -4(DX), DX
19366  	CMPL SI, $0x08
19367  	JBE  repeat_two_standalone_emit_copy
19368  	CMPL SI, $0x0c
19369  	JAE  cant_repeat_two_offset_standalone_emit_copy
19370  	CMPL CX, $0x00000800
19371  	JB   repeat_two_offset_standalone_emit_copy
19372  
19373  cant_repeat_two_offset_standalone_emit_copy:
19374  	CMPL DX, $0x00000104
19375  	JB   repeat_three_standalone_emit_copy
19376  	CMPL DX, $0x00010100
19377  	JB   repeat_four_standalone_emit_copy
19378  	CMPL DX, $0x0100ffff
19379  	JB   repeat_five_standalone_emit_copy
19380  	LEAL -16842747(DX), DX
19381  	MOVL $0xfffb001d, (AX)
19382  	MOVB $0xff, 4(AX)
19383  	ADDQ $0x05, AX
19384  	ADDQ $0x05, BX
19385  	JMP  emit_repeat_again_standalone_emit_copy
19386  
19387  repeat_five_standalone_emit_copy:
19388  	LEAL -65536(DX), DX
19389  	MOVL DX, CX
19390  	MOVW $0x001d, (AX)
19391  	MOVW DX, 2(AX)
19392  	SARL $0x10, CX
19393  	MOVB CL, 4(AX)
19394  	ADDQ $0x05, BX
19395  	ADDQ $0x05, AX
19396  	JMP  gen_emit_copy_end
19397  
19398  repeat_four_standalone_emit_copy:
19399  	LEAL -256(DX), DX
19400  	MOVW $0x0019, (AX)
19401  	MOVW DX, 2(AX)
19402  	ADDQ $0x04, BX
19403  	ADDQ $0x04, AX
19404  	JMP  gen_emit_copy_end
19405  
19406  repeat_three_standalone_emit_copy:
19407  	LEAL -4(DX), DX
19408  	MOVW $0x0015, (AX)
19409  	MOVB DL, 2(AX)
19410  	ADDQ $0x03, BX
19411  	ADDQ $0x03, AX
19412  	JMP  gen_emit_copy_end
19413  
19414  repeat_two_standalone_emit_copy:
19415  	SHLL $0x02, DX
19416  	ORL  $0x01, DX
19417  	MOVW DX, (AX)
19418  	ADDQ $0x02, BX
19419  	ADDQ $0x02, AX
19420  	JMP  gen_emit_copy_end
19421  
19422  repeat_two_offset_standalone_emit_copy:
19423  	XORQ SI, SI
19424  	LEAL 1(SI)(DX*4), DX
19425  	MOVB CL, 1(AX)
19426  	SARL $0x08, CX
19427  	SHLL $0x05, CX
19428  	ORL  CX, DX
19429  	MOVB DL, (AX)
19430  	ADDQ $0x02, BX
19431  	ADDQ $0x02, AX
19432  	JMP  gen_emit_copy_end
19433  
19434  four_bytes_remain_standalone:
19435  	TESTL DX, DX
19436  	JZ    gen_emit_copy_end
19437  	XORL  SI, SI
19438  	LEAL  -1(SI)(DX*4), DX
19439  	MOVB  DL, (AX)
19440  	MOVL  CX, 1(AX)
19441  	ADDQ  $0x05, BX
19442  	ADDQ  $0x05, AX
19443  	JMP   gen_emit_copy_end
19444  
19445  two_byte_offset_standalone:
19446  	CMPL DX, $0x40
19447  	JBE  two_byte_offset_short_standalone
19448  	CMPL CX, $0x00000800
19449  	JAE  long_offset_short_standalone
19450  	MOVL $0x00000001, SI
19451  	LEAL 16(SI), SI
19452  	MOVB CL, 1(AX)
19453  	MOVL CX, DI
19454  	SHRL $0x08, DI
19455  	SHLL $0x05, DI
19456  	ORL  DI, SI
19457  	MOVB SI, (AX)
19458  	ADDQ $0x02, BX
19459  	ADDQ $0x02, AX
19460  	SUBL $0x08, DX
19461  
19462  	// emitRepeat
19463  	LEAL -4(DX), DX
19464  	JMP  cant_repeat_two_offset_standalone_emit_copy_short_2b
19465  
19466  emit_repeat_again_standalone_emit_copy_short_2b:
19467  	MOVL DX, SI
19468  	LEAL -4(DX), DX
19469  	CMPL SI, $0x08
19470  	JBE  repeat_two_standalone_emit_copy_short_2b
19471  	CMPL SI, $0x0c
19472  	JAE  cant_repeat_two_offset_standalone_emit_copy_short_2b
19473  	CMPL CX, $0x00000800
19474  	JB   repeat_two_offset_standalone_emit_copy_short_2b
19475  
19476  cant_repeat_two_offset_standalone_emit_copy_short_2b:
19477  	CMPL DX, $0x00000104
19478  	JB   repeat_three_standalone_emit_copy_short_2b
19479  	CMPL DX, $0x00010100
19480  	JB   repeat_four_standalone_emit_copy_short_2b
19481  	CMPL DX, $0x0100ffff
19482  	JB   repeat_five_standalone_emit_copy_short_2b
19483  	LEAL -16842747(DX), DX
19484  	MOVL $0xfffb001d, (AX)
19485  	MOVB $0xff, 4(AX)
19486  	ADDQ $0x05, AX
19487  	ADDQ $0x05, BX
19488  	JMP  emit_repeat_again_standalone_emit_copy_short_2b
19489  
19490  repeat_five_standalone_emit_copy_short_2b:
19491  	LEAL -65536(DX), DX
19492  	MOVL DX, CX
19493  	MOVW $0x001d, (AX)
19494  	MOVW DX, 2(AX)
19495  	SARL $0x10, CX
19496  	MOVB CL, 4(AX)
19497  	ADDQ $0x05, BX
19498  	ADDQ $0x05, AX
19499  	JMP  gen_emit_copy_end
19500  
19501  repeat_four_standalone_emit_copy_short_2b:
19502  	LEAL -256(DX), DX
19503  	MOVW $0x0019, (AX)
19504  	MOVW DX, 2(AX)
19505  	ADDQ $0x04, BX
19506  	ADDQ $0x04, AX
19507  	JMP  gen_emit_copy_end
19508  
19509  repeat_three_standalone_emit_copy_short_2b:
19510  	LEAL -4(DX), DX
19511  	MOVW $0x0015, (AX)
19512  	MOVB DL, 2(AX)
19513  	ADDQ $0x03, BX
19514  	ADDQ $0x03, AX
19515  	JMP  gen_emit_copy_end
19516  
19517  repeat_two_standalone_emit_copy_short_2b:
19518  	SHLL $0x02, DX
19519  	ORL  $0x01, DX
19520  	MOVW DX, (AX)
19521  	ADDQ $0x02, BX
19522  	ADDQ $0x02, AX
19523  	JMP  gen_emit_copy_end
19524  
19525  repeat_two_offset_standalone_emit_copy_short_2b:
19526  	XORQ SI, SI
19527  	LEAL 1(SI)(DX*4), DX
19528  	MOVB CL, 1(AX)
19529  	SARL $0x08, CX
19530  	SHLL $0x05, CX
19531  	ORL  CX, DX
19532  	MOVB DL, (AX)
19533  	ADDQ $0x02, BX
19534  	ADDQ $0x02, AX
19535  	JMP  gen_emit_copy_end
19536  
19537  long_offset_short_standalone:
19538  	MOVB $0xee, (AX)
19539  	MOVW CX, 1(AX)
19540  	LEAL -60(DX), DX
19541  	ADDQ $0x03, AX
19542  	ADDQ $0x03, BX
19543  
19544  	// emitRepeat
19545  emit_repeat_again_standalone_emit_copy_short:
19546  	MOVL DX, SI
19547  	LEAL -4(DX), DX
19548  	CMPL SI, $0x08
19549  	JBE  repeat_two_standalone_emit_copy_short
19550  	CMPL SI, $0x0c
19551  	JAE  cant_repeat_two_offset_standalone_emit_copy_short
19552  	CMPL CX, $0x00000800
19553  	JB   repeat_two_offset_standalone_emit_copy_short
19554  
19555  cant_repeat_two_offset_standalone_emit_copy_short:
19556  	CMPL DX, $0x00000104
19557  	JB   repeat_three_standalone_emit_copy_short
19558  	CMPL DX, $0x00010100
19559  	JB   repeat_four_standalone_emit_copy_short
19560  	CMPL DX, $0x0100ffff
19561  	JB   repeat_five_standalone_emit_copy_short
19562  	LEAL -16842747(DX), DX
19563  	MOVL $0xfffb001d, (AX)
19564  	MOVB $0xff, 4(AX)
19565  	ADDQ $0x05, AX
19566  	ADDQ $0x05, BX
19567  	JMP  emit_repeat_again_standalone_emit_copy_short
19568  
19569  repeat_five_standalone_emit_copy_short:
19570  	LEAL -65536(DX), DX
19571  	MOVL DX, CX
19572  	MOVW $0x001d, (AX)
19573  	MOVW DX, 2(AX)
19574  	SARL $0x10, CX
19575  	MOVB CL, 4(AX)
19576  	ADDQ $0x05, BX
19577  	ADDQ $0x05, AX
19578  	JMP  gen_emit_copy_end
19579  
19580  repeat_four_standalone_emit_copy_short:
19581  	LEAL -256(DX), DX
19582  	MOVW $0x0019, (AX)
19583  	MOVW DX, 2(AX)
19584  	ADDQ $0x04, BX
19585  	ADDQ $0x04, AX
19586  	JMP  gen_emit_copy_end
19587  
19588  repeat_three_standalone_emit_copy_short:
19589  	LEAL -4(DX), DX
19590  	MOVW $0x0015, (AX)
19591  	MOVB DL, 2(AX)
19592  	ADDQ $0x03, BX
19593  	ADDQ $0x03, AX
19594  	JMP  gen_emit_copy_end
19595  
19596  repeat_two_standalone_emit_copy_short:
19597  	SHLL $0x02, DX
19598  	ORL  $0x01, DX
19599  	MOVW DX, (AX)
19600  	ADDQ $0x02, BX
19601  	ADDQ $0x02, AX
19602  	JMP  gen_emit_copy_end
19603  
19604  repeat_two_offset_standalone_emit_copy_short:
19605  	XORQ SI, SI
19606  	LEAL 1(SI)(DX*4), DX
19607  	MOVB CL, 1(AX)
19608  	SARL $0x08, CX
19609  	SHLL $0x05, CX
19610  	ORL  CX, DX
19611  	MOVB DL, (AX)
19612  	ADDQ $0x02, BX
19613  	ADDQ $0x02, AX
19614  	JMP  gen_emit_copy_end
19615  
19616  two_byte_offset_short_standalone:
19617  	MOVL DX, SI
19618  	SHLL $0x02, SI
19619  	CMPL DX, $0x0c
19620  	JAE  emit_copy_three_standalone
19621  	CMPL CX, $0x00000800
19622  	JAE  emit_copy_three_standalone
19623  	LEAL -15(SI), SI
19624  	MOVB CL, 1(AX)
19625  	SHRL $0x08, CX
19626  	SHLL $0x05, CX
19627  	ORL  CX, SI
19628  	MOVB SI, (AX)
19629  	ADDQ $0x02, BX
19630  	ADDQ $0x02, AX
19631  	JMP  gen_emit_copy_end
19632  
19633  emit_copy_three_standalone:
19634  	LEAL -2(SI), SI
19635  	MOVB SI, (AX)
19636  	MOVW CX, 1(AX)
19637  	ADDQ $0x03, BX
19638  	ADDQ $0x03, AX
19639  
19640  gen_emit_copy_end:
19641  	MOVQ BX, ret+40(FP)
19642  	RET
19643  
19644  // func emitCopyNoRepeat(dst []byte, offset int, length int) int
19645  TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48
19646  	XORQ BX, BX
19647  	MOVQ dst_base+0(FP), AX
19648  	MOVQ offset+24(FP), CX
19649  	MOVQ length+32(FP), DX
19650  
19651  	// emitCopy
19652  	CMPL CX, $0x00010000
19653  	JB   two_byte_offset_standalone_snappy
19654  
19655  four_bytes_loop_back_standalone_snappy:
19656  	CMPL DX, $0x40
19657  	JBE  four_bytes_remain_standalone_snappy
19658  	MOVB $0xff, (AX)
19659  	MOVL CX, 1(AX)
19660  	LEAL -64(DX), DX
19661  	ADDQ $0x05, BX
19662  	ADDQ $0x05, AX
19663  	CMPL DX, $0x04
19664  	JB   four_bytes_remain_standalone_snappy
19665  	JMP  four_bytes_loop_back_standalone_snappy
19666  
19667  four_bytes_remain_standalone_snappy:
19668  	TESTL DX, DX
19669  	JZ    gen_emit_copy_end_snappy
19670  	XORL  SI, SI
19671  	LEAL  -1(SI)(DX*4), DX
19672  	MOVB  DL, (AX)
19673  	MOVL  CX, 1(AX)
19674  	ADDQ  $0x05, BX
19675  	ADDQ  $0x05, AX
19676  	JMP   gen_emit_copy_end_snappy
19677  
19678  two_byte_offset_standalone_snappy:
19679  	CMPL DX, $0x40
19680  	JBE  two_byte_offset_short_standalone_snappy
19681  	MOVB $0xee, (AX)
19682  	MOVW CX, 1(AX)
19683  	LEAL -60(DX), DX
19684  	ADDQ $0x03, AX
19685  	ADDQ $0x03, BX
19686  	JMP  two_byte_offset_standalone_snappy
19687  
19688  two_byte_offset_short_standalone_snappy:
19689  	MOVL DX, SI
19690  	SHLL $0x02, SI
19691  	CMPL DX, $0x0c
19692  	JAE  emit_copy_three_standalone_snappy
19693  	CMPL CX, $0x00000800
19694  	JAE  emit_copy_three_standalone_snappy
19695  	LEAL -15(SI), SI
19696  	MOVB CL, 1(AX)
19697  	SHRL $0x08, CX
19698  	SHLL $0x05, CX
19699  	ORL  CX, SI
19700  	MOVB SI, (AX)
19701  	ADDQ $0x02, BX
19702  	ADDQ $0x02, AX
19703  	JMP  gen_emit_copy_end_snappy
19704  
19705  emit_copy_three_standalone_snappy:
19706  	LEAL -2(SI), SI
19707  	MOVB SI, (AX)
19708  	MOVW CX, 1(AX)
19709  	ADDQ $0x03, BX
19710  	ADDQ $0x03, AX
19711  
19712  gen_emit_copy_end_snappy:
19713  	MOVQ BX, ret+40(FP)
19714  	RET
19715  
19716  // func matchLen(a []byte, b []byte) int
19717  // Requires: BMI
19718  TEXT ·matchLen(SB), NOSPLIT, $0-56
19719  	MOVQ a_base+0(FP), AX
19720  	MOVQ b_base+24(FP), CX
19721  	MOVQ a_len+8(FP), DX
19722  
19723  	// matchLen
19724  	XORL SI, SI
19725  
19726  matchlen_loopback_16_standalone:
19727  	CMPL DX, $0x10
19728  	JB   matchlen_match8_standalone
19729  	MOVQ (AX)(SI*1), BX
19730  	MOVQ 8(AX)(SI*1), DI
19731  	XORQ (CX)(SI*1), BX
19732  	JNZ  matchlen_bsf_8_standalone
19733  	XORQ 8(CX)(SI*1), DI
19734  	JNZ  matchlen_bsf_16standalone
19735  	LEAL -16(DX), DX
19736  	LEAL 16(SI), SI
19737  	JMP  matchlen_loopback_16_standalone
19738  
19739  matchlen_bsf_16standalone:
19740  #ifdef GOAMD64_v3
19741  	TZCNTQ DI, DI
19742  
19743  #else
19744  	BSFQ DI, DI
19745  
19746  #endif
19747  	SARQ $0x03, DI
19748  	LEAL 8(SI)(DI*1), SI
19749  	JMP  gen_match_len_end
19750  
19751  matchlen_match8_standalone:
19752  	CMPL DX, $0x08
19753  	JB   matchlen_match4_standalone
19754  	MOVQ (AX)(SI*1), BX
19755  	XORQ (CX)(SI*1), BX
19756  	JNZ  matchlen_bsf_8_standalone
19757  	LEAL -8(DX), DX
19758  	LEAL 8(SI), SI
19759  	JMP  matchlen_match4_standalone
19760  
19761  matchlen_bsf_8_standalone:
19762  #ifdef GOAMD64_v3
19763  	TZCNTQ BX, BX
19764  
19765  #else
19766  	BSFQ BX, BX
19767  
19768  #endif
19769  	SARQ $0x03, BX
19770  	LEAL (SI)(BX*1), SI
19771  	JMP  gen_match_len_end
19772  
19773  matchlen_match4_standalone:
19774  	CMPL DX, $0x04
19775  	JB   matchlen_match2_standalone
19776  	MOVL (AX)(SI*1), BX
19777  	CMPL (CX)(SI*1), BX
19778  	JNE  matchlen_match2_standalone
19779  	LEAL -4(DX), DX
19780  	LEAL 4(SI), SI
19781  
19782  matchlen_match2_standalone:
19783  	CMPL DX, $0x01
19784  	JE   matchlen_match1_standalone
19785  	JB   gen_match_len_end
19786  	MOVW (AX)(SI*1), BX
19787  	CMPW (CX)(SI*1), BX
19788  	JNE  matchlen_match1_standalone
19789  	LEAL 2(SI), SI
19790  	SUBL $0x02, DX
19791  	JZ   gen_match_len_end
19792  
19793  matchlen_match1_standalone:
19794  	MOVB (AX)(SI*1), BL
19795  	CMPB (CX)(SI*1), BL
19796  	JNE  gen_match_len_end
19797  	LEAL 1(SI), SI
19798  
19799  gen_match_len_end:
19800  	MOVQ SI, ret+48(FP)
19801  	RET
19802  
19803  // func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
19804  // Requires: SSE2
19805  TEXT ·cvtLZ4BlockAsm(SB), NOSPLIT, $0-64
19806  	XORQ SI, SI
19807  	MOVQ dst_base+0(FP), AX
19808  	MOVQ dst_len+8(FP), CX
19809  	MOVQ src_base+24(FP), DX
19810  	MOVQ src_len+32(FP), BX
19811  	LEAQ (DX)(BX*1), BX
19812  	LEAQ -8(AX)(CX*1), CX
19813  	XORQ DI, DI
19814  
19815  lz4_s2_loop:
19816  	CMPQ    DX, BX
19817  	JAE     lz4_s2_corrupt
19818  	CMPQ    AX, CX
19819  	JAE     lz4_s2_dstfull
19820  	MOVBQZX (DX), R8
19821  	MOVQ    R8, R9
19822  	MOVQ    R8, R10
19823  	SHRQ    $0x04, R9
19824  	ANDQ    $0x0f, R10
19825  	CMPQ    R8, $0xf0
19826  	JB      lz4_s2_ll_end
19827  
19828  lz4_s2_ll_loop:
19829  	INCQ    DX
19830  	CMPQ    DX, BX
19831  	JAE     lz4_s2_corrupt
19832  	MOVBQZX (DX), R8
19833  	ADDQ    R8, R9
19834  	CMPQ    R8, $0xff
19835  	JEQ     lz4_s2_ll_loop
19836  
19837  lz4_s2_ll_end:
19838  	LEAQ  (DX)(R9*1), R8
19839  	ADDQ  $0x04, R10
19840  	CMPQ  R8, BX
19841  	JAE   lz4_s2_corrupt
19842  	INCQ  DX
19843  	INCQ  R8
19844  	TESTQ R9, R9
19845  	JZ    lz4_s2_lits_done
19846  	LEAQ  (AX)(R9*1), R11
19847  	CMPQ  R11, CX
19848  	JAE   lz4_s2_dstfull
19849  	ADDQ  R9, SI
19850  	LEAL  -1(R9), R11
19851  	CMPL  R11, $0x3c
19852  	JB    one_byte_lz4_s2
19853  	CMPL  R11, $0x00000100
19854  	JB    two_bytes_lz4_s2
19855  	CMPL  R11, $0x00010000
19856  	JB    three_bytes_lz4_s2
19857  	CMPL  R11, $0x01000000
19858  	JB    four_bytes_lz4_s2
19859  	MOVB  $0xfc, (AX)
19860  	MOVL  R11, 1(AX)
19861  	ADDQ  $0x05, AX
19862  	JMP   memmove_long_lz4_s2
19863  
19864  four_bytes_lz4_s2:
19865  	MOVL R11, R12
19866  	SHRL $0x10, R12
19867  	MOVB $0xf8, (AX)
19868  	MOVW R11, 1(AX)
19869  	MOVB R12, 3(AX)
19870  	ADDQ $0x04, AX
19871  	JMP  memmove_long_lz4_s2
19872  
19873  three_bytes_lz4_s2:
19874  	MOVB $0xf4, (AX)
19875  	MOVW R11, 1(AX)
19876  	ADDQ $0x03, AX
19877  	JMP  memmove_long_lz4_s2
19878  
19879  two_bytes_lz4_s2:
19880  	MOVB $0xf0, (AX)
19881  	MOVB R11, 1(AX)
19882  	ADDQ $0x02, AX
19883  	CMPL R11, $0x40
19884  	JB   memmove_lz4_s2
19885  	JMP  memmove_long_lz4_s2
19886  
19887  one_byte_lz4_s2:
19888  	SHLB $0x02, R11
19889  	MOVB R11, (AX)
19890  	ADDQ $0x01, AX
19891  
19892  memmove_lz4_s2:
19893  	LEAQ (AX)(R9*1), R11
19894  
19895  	// genMemMoveShort
19896  	CMPQ R9, $0x08
19897  	JBE  emit_lit_memmove_lz4_s2_memmove_move_8
19898  	CMPQ R9, $0x10
19899  	JBE  emit_lit_memmove_lz4_s2_memmove_move_8through16
19900  	CMPQ R9, $0x20
19901  	JBE  emit_lit_memmove_lz4_s2_memmove_move_17through32
19902  	JMP  emit_lit_memmove_lz4_s2_memmove_move_33through64
19903  
19904  emit_lit_memmove_lz4_s2_memmove_move_8:
19905  	MOVQ (DX), R12
19906  	MOVQ R12, (AX)
19907  	JMP  memmove_end_copy_lz4_s2
19908  
19909  emit_lit_memmove_lz4_s2_memmove_move_8through16:
19910  	MOVQ (DX), R12
19911  	MOVQ -8(DX)(R9*1), DX
19912  	MOVQ R12, (AX)
19913  	MOVQ DX, -8(AX)(R9*1)
19914  	JMP  memmove_end_copy_lz4_s2
19915  
19916  emit_lit_memmove_lz4_s2_memmove_move_17through32:
19917  	MOVOU (DX), X0
19918  	MOVOU -16(DX)(R9*1), X1
19919  	MOVOU X0, (AX)
19920  	MOVOU X1, -16(AX)(R9*1)
19921  	JMP   memmove_end_copy_lz4_s2
19922  
19923  emit_lit_memmove_lz4_s2_memmove_move_33through64:
19924  	MOVOU (DX), X0
19925  	MOVOU 16(DX), X1
19926  	MOVOU -32(DX)(R9*1), X2
19927  	MOVOU -16(DX)(R9*1), X3
19928  	MOVOU X0, (AX)
19929  	MOVOU X1, 16(AX)
19930  	MOVOU X2, -32(AX)(R9*1)
19931  	MOVOU X3, -16(AX)(R9*1)
19932  
19933  memmove_end_copy_lz4_s2:
19934  	MOVQ R11, AX
19935  	JMP  lz4_s2_lits_emit_done
19936  
19937  memmove_long_lz4_s2:
19938  	LEAQ (AX)(R9*1), R11
19939  
19940  	// genMemMoveLong
19941  	MOVOU (DX), X0
19942  	MOVOU 16(DX), X1
19943  	MOVOU -32(DX)(R9*1), X2
19944  	MOVOU -16(DX)(R9*1), X3
19945  	MOVQ  R9, R13
19946  	SHRQ  $0x05, R13
19947  	MOVQ  AX, R12
19948  	ANDL  $0x0000001f, R12
19949  	MOVQ  $0x00000040, R14
19950  	SUBQ  R12, R14
19951  	DECQ  R13
19952  	JA    emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32
19953  	LEAQ  -32(DX)(R14*1), R12
19954  	LEAQ  -32(AX)(R14*1), R15
19955  
19956  emit_lit_memmove_long_lz4_s2large_big_loop_back:
19957  	MOVOU (R12), X4
19958  	MOVOU 16(R12), X5
19959  	MOVOA X4, (R15)
19960  	MOVOA X5, 16(R15)
19961  	ADDQ  $0x20, R15
19962  	ADDQ  $0x20, R12
19963  	ADDQ  $0x20, R14
19964  	DECQ  R13
19965  	JNA   emit_lit_memmove_long_lz4_s2large_big_loop_back
19966  
19967  emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32:
19968  	MOVOU -32(DX)(R14*1), X4
19969  	MOVOU -16(DX)(R14*1), X5
19970  	MOVOA X4, -32(AX)(R14*1)
19971  	MOVOA X5, -16(AX)(R14*1)
19972  	ADDQ  $0x20, R14
19973  	CMPQ  R9, R14
19974  	JAE   emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32
19975  	MOVOU X0, (AX)
19976  	MOVOU X1, 16(AX)
19977  	MOVOU X2, -32(AX)(R9*1)
19978  	MOVOU X3, -16(AX)(R9*1)
19979  	MOVQ  R11, AX
19980  
19981  lz4_s2_lits_emit_done:
19982  	MOVQ R8, DX
19983  
19984  lz4_s2_lits_done:
19985  	CMPQ DX, BX
19986  	JNE  lz4_s2_match
19987  	CMPQ R10, $0x04
19988  	JEQ  lz4_s2_done
19989  	JMP  lz4_s2_corrupt
19990  
19991  lz4_s2_match:
19992  	LEAQ    2(DX), R8
19993  	CMPQ    R8, BX
19994  	JAE     lz4_s2_corrupt
19995  	MOVWQZX (DX), R9
19996  	MOVQ    R8, DX
19997  	TESTQ   R9, R9
19998  	JZ      lz4_s2_corrupt
19999  	CMPQ    R9, SI
20000  	JA      lz4_s2_corrupt
20001  	CMPQ    R10, $0x13
20002  	JNE     lz4_s2_ml_done
20003  
20004  lz4_s2_ml_loop:
20005  	MOVBQZX (DX), R8
20006  	INCQ    DX
20007  	ADDQ    R8, R10
20008  	CMPQ    DX, BX
20009  	JAE     lz4_s2_corrupt
20010  	CMPQ    R8, $0xff
20011  	JEQ     lz4_s2_ml_loop
20012  
20013  lz4_s2_ml_done:
20014  	ADDQ R10, SI
20015  	CMPQ R9, DI
20016  	JNE  lz4_s2_docopy
20017  
20018  	// emitRepeat
20019  emit_repeat_again_lz4_s2:
20020  	MOVL R10, R8
20021  	LEAL -4(R10), R10
20022  	CMPL R8, $0x08
20023  	JBE  repeat_two_lz4_s2
20024  	CMPL R8, $0x0c
20025  	JAE  cant_repeat_two_offset_lz4_s2
20026  	CMPL R9, $0x00000800
20027  	JB   repeat_two_offset_lz4_s2
20028  
20029  cant_repeat_two_offset_lz4_s2:
20030  	CMPL R10, $0x00000104
20031  	JB   repeat_three_lz4_s2
20032  	CMPL R10, $0x00010100
20033  	JB   repeat_four_lz4_s2
20034  	CMPL R10, $0x0100ffff
20035  	JB   repeat_five_lz4_s2
20036  	LEAL -16842747(R10), R10
20037  	MOVL $0xfffb001d, (AX)
20038  	MOVB $0xff, 4(AX)
20039  	ADDQ $0x05, AX
20040  	JMP  emit_repeat_again_lz4_s2
20041  
20042  repeat_five_lz4_s2:
20043  	LEAL -65536(R10), R10
20044  	MOVL R10, R9
20045  	MOVW $0x001d, (AX)
20046  	MOVW R10, 2(AX)
20047  	SARL $0x10, R9
20048  	MOVB R9, 4(AX)
20049  	ADDQ $0x05, AX
20050  	JMP  lz4_s2_loop
20051  
20052  repeat_four_lz4_s2:
20053  	LEAL -256(R10), R10
20054  	MOVW $0x0019, (AX)
20055  	MOVW R10, 2(AX)
20056  	ADDQ $0x04, AX
20057  	JMP  lz4_s2_loop
20058  
20059  repeat_three_lz4_s2:
20060  	LEAL -4(R10), R10
20061  	MOVW $0x0015, (AX)
20062  	MOVB R10, 2(AX)
20063  	ADDQ $0x03, AX
20064  	JMP  lz4_s2_loop
20065  
20066  repeat_two_lz4_s2:
20067  	SHLL $0x02, R10
20068  	ORL  $0x01, R10
20069  	MOVW R10, (AX)
20070  	ADDQ $0x02, AX
20071  	JMP  lz4_s2_loop
20072  
20073  repeat_two_offset_lz4_s2:
20074  	XORQ R8, R8
20075  	LEAL 1(R8)(R10*4), R10
20076  	MOVB R9, 1(AX)
20077  	SARL $0x08, R9
20078  	SHLL $0x05, R9
20079  	ORL  R9, R10
20080  	MOVB R10, (AX)
20081  	ADDQ $0x02, AX
20082  	JMP  lz4_s2_loop
20083  
20084  lz4_s2_docopy:
20085  	MOVQ R9, DI
20086  
20087  	// emitCopy
20088  	CMPL R10, $0x40
20089  	JBE  two_byte_offset_short_lz4_s2
20090  	CMPL R9, $0x00000800
20091  	JAE  long_offset_short_lz4_s2
20092  	MOVL $0x00000001, R8
20093  	LEAL 16(R8), R8
20094  	MOVB R9, 1(AX)
20095  	MOVL R9, R11
20096  	SHRL $0x08, R11
20097  	SHLL $0x05, R11
20098  	ORL  R11, R8
20099  	MOVB R8, (AX)
20100  	ADDQ $0x02, AX
20101  	SUBL $0x08, R10
20102  
20103  	// emitRepeat
20104  	LEAL -4(R10), R10
20105  	JMP  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
20106  
20107  emit_repeat_again_lz4_s2_emit_copy_short_2b:
20108  	MOVL R10, R8
20109  	LEAL -4(R10), R10
20110  	CMPL R8, $0x08
20111  	JBE  repeat_two_lz4_s2_emit_copy_short_2b
20112  	CMPL R8, $0x0c
20113  	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
20114  	CMPL R9, $0x00000800
20115  	JB   repeat_two_offset_lz4_s2_emit_copy_short_2b
20116  
20117  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b:
20118  	CMPL R10, $0x00000104
20119  	JB   repeat_three_lz4_s2_emit_copy_short_2b
20120  	CMPL R10, $0x00010100
20121  	JB   repeat_four_lz4_s2_emit_copy_short_2b
20122  	CMPL R10, $0x0100ffff
20123  	JB   repeat_five_lz4_s2_emit_copy_short_2b
20124  	LEAL -16842747(R10), R10
20125  	MOVL $0xfffb001d, (AX)
20126  	MOVB $0xff, 4(AX)
20127  	ADDQ $0x05, AX
20128  	JMP  emit_repeat_again_lz4_s2_emit_copy_short_2b
20129  
20130  repeat_five_lz4_s2_emit_copy_short_2b:
20131  	LEAL -65536(R10), R10
20132  	MOVL R10, R9
20133  	MOVW $0x001d, (AX)
20134  	MOVW R10, 2(AX)
20135  	SARL $0x10, R9
20136  	MOVB R9, 4(AX)
20137  	ADDQ $0x05, AX
20138  	JMP  lz4_s2_loop
20139  
20140  repeat_four_lz4_s2_emit_copy_short_2b:
20141  	LEAL -256(R10), R10
20142  	MOVW $0x0019, (AX)
20143  	MOVW R10, 2(AX)
20144  	ADDQ $0x04, AX
20145  	JMP  lz4_s2_loop
20146  
20147  repeat_three_lz4_s2_emit_copy_short_2b:
20148  	LEAL -4(R10), R10
20149  	MOVW $0x0015, (AX)
20150  	MOVB R10, 2(AX)
20151  	ADDQ $0x03, AX
20152  	JMP  lz4_s2_loop
20153  
20154  repeat_two_lz4_s2_emit_copy_short_2b:
20155  	SHLL $0x02, R10
20156  	ORL  $0x01, R10
20157  	MOVW R10, (AX)
20158  	ADDQ $0x02, AX
20159  	JMP  lz4_s2_loop
20160  
20161  repeat_two_offset_lz4_s2_emit_copy_short_2b:
20162  	XORQ R8, R8
20163  	LEAL 1(R8)(R10*4), R10
20164  	MOVB R9, 1(AX)
20165  	SARL $0x08, R9
20166  	SHLL $0x05, R9
20167  	ORL  R9, R10
20168  	MOVB R10, (AX)
20169  	ADDQ $0x02, AX
20170  	JMP  lz4_s2_loop
20171  
20172  long_offset_short_lz4_s2:
20173  	MOVB $0xee, (AX)
20174  	MOVW R9, 1(AX)
20175  	LEAL -60(R10), R10
20176  	ADDQ $0x03, AX
20177  
20178  	// emitRepeat
20179  emit_repeat_again_lz4_s2_emit_copy_short:
20180  	MOVL R10, R8
20181  	LEAL -4(R10), R10
20182  	CMPL R8, $0x08
20183  	JBE  repeat_two_lz4_s2_emit_copy_short
20184  	CMPL R8, $0x0c
20185  	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short
20186  	CMPL R9, $0x00000800
20187  	JB   repeat_two_offset_lz4_s2_emit_copy_short
20188  
20189  cant_repeat_two_offset_lz4_s2_emit_copy_short:
20190  	CMPL R10, $0x00000104
20191  	JB   repeat_three_lz4_s2_emit_copy_short
20192  	CMPL R10, $0x00010100
20193  	JB   repeat_four_lz4_s2_emit_copy_short
20194  	CMPL R10, $0x0100ffff
20195  	JB   repeat_five_lz4_s2_emit_copy_short
20196  	LEAL -16842747(R10), R10
20197  	MOVL $0xfffb001d, (AX)
20198  	MOVB $0xff, 4(AX)
20199  	ADDQ $0x05, AX
20200  	JMP  emit_repeat_again_lz4_s2_emit_copy_short
20201  
20202  repeat_five_lz4_s2_emit_copy_short:
20203  	LEAL -65536(R10), R10
20204  	MOVL R10, R9
20205  	MOVW $0x001d, (AX)
20206  	MOVW R10, 2(AX)
20207  	SARL $0x10, R9
20208  	MOVB R9, 4(AX)
20209  	ADDQ $0x05, AX
20210  	JMP  lz4_s2_loop
20211  
20212  repeat_four_lz4_s2_emit_copy_short:
20213  	LEAL -256(R10), R10
20214  	MOVW $0x0019, (AX)
20215  	MOVW R10, 2(AX)
20216  	ADDQ $0x04, AX
20217  	JMP  lz4_s2_loop
20218  
20219  repeat_three_lz4_s2_emit_copy_short:
20220  	LEAL -4(R10), R10
20221  	MOVW $0x0015, (AX)
20222  	MOVB R10, 2(AX)
20223  	ADDQ $0x03, AX
20224  	JMP  lz4_s2_loop
20225  
20226  repeat_two_lz4_s2_emit_copy_short:
20227  	SHLL $0x02, R10
20228  	ORL  $0x01, R10
20229  	MOVW R10, (AX)
20230  	ADDQ $0x02, AX
20231  	JMP  lz4_s2_loop
20232  
20233  repeat_two_offset_lz4_s2_emit_copy_short:
20234  	XORQ R8, R8
20235  	LEAL 1(R8)(R10*4), R10
20236  	MOVB R9, 1(AX)
20237  	SARL $0x08, R9
20238  	SHLL $0x05, R9
20239  	ORL  R9, R10
20240  	MOVB R10, (AX)
20241  	ADDQ $0x02, AX
20242  	JMP  lz4_s2_loop
20243  
20244  two_byte_offset_short_lz4_s2:
20245  	MOVL R10, R8
20246  	SHLL $0x02, R8
20247  	CMPL R10, $0x0c
20248  	JAE  emit_copy_three_lz4_s2
20249  	CMPL R9, $0x00000800
20250  	JAE  emit_copy_three_lz4_s2
20251  	LEAL -15(R8), R8
20252  	MOVB R9, 1(AX)
20253  	SHRL $0x08, R9
20254  	SHLL $0x05, R9
20255  	ORL  R9, R8
20256  	MOVB R8, (AX)
20257  	ADDQ $0x02, AX
20258  	JMP  lz4_s2_loop
20259  
20260  emit_copy_three_lz4_s2:
20261  	LEAL -2(R8), R8
20262  	MOVB R8, (AX)
20263  	MOVW R9, 1(AX)
20264  	ADDQ $0x03, AX
20265  	JMP  lz4_s2_loop
20266  
20267  lz4_s2_done:
20268  	MOVQ dst_base+0(FP), CX
20269  	SUBQ CX, AX
20270  	MOVQ SI, uncompressed+48(FP)
20271  	MOVQ AX, dstUsed+56(FP)
20272  	RET
20273  
20274  lz4_s2_corrupt:
20275  	XORQ AX, AX
20276  	LEAQ -1(AX), SI
20277  	MOVQ SI, uncompressed+48(FP)
20278  	RET
20279  
20280  lz4_s2_dstfull:
20281  	XORQ AX, AX
20282  	LEAQ -2(AX), SI
20283  	MOVQ SI, uncompressed+48(FP)
20284  	RET
20285  
20286  // func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
20287  // Requires: SSE2
20288  TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64
20289  	XORQ SI, SI
20290  	MOVQ dst_base+0(FP), AX
20291  	MOVQ dst_len+8(FP), CX
20292  	MOVQ src_base+24(FP), DX
20293  	MOVQ src_len+32(FP), BX
20294  	LEAQ (DX)(BX*1), BX
20295  	LEAQ -8(AX)(CX*1), CX
20296  	XORQ DI, DI
20297  
20298  lz4s_s2_loop:
20299  	CMPQ    DX, BX
20300  	JAE     lz4s_s2_corrupt
20301  	CMPQ    AX, CX
20302  	JAE     lz4s_s2_dstfull
20303  	MOVBQZX (DX), R8
20304  	MOVQ    R8, R9
20305  	MOVQ    R8, R10
20306  	SHRQ    $0x04, R9
20307  	ANDQ    $0x0f, R10
20308  	CMPQ    R8, $0xf0
20309  	JB      lz4s_s2_ll_end
20310  
20311  lz4s_s2_ll_loop:
20312  	INCQ    DX
20313  	CMPQ    DX, BX
20314  	JAE     lz4s_s2_corrupt
20315  	MOVBQZX (DX), R8
20316  	ADDQ    R8, R9
20317  	CMPQ    R8, $0xff
20318  	JEQ     lz4s_s2_ll_loop
20319  
20320  lz4s_s2_ll_end:
20321  	LEAQ  (DX)(R9*1), R8
20322  	ADDQ  $0x03, R10
20323  	CMPQ  R8, BX
20324  	JAE   lz4s_s2_corrupt
20325  	INCQ  DX
20326  	INCQ  R8
20327  	TESTQ R9, R9
20328  	JZ    lz4s_s2_lits_done
20329  	LEAQ  (AX)(R9*1), R11
20330  	CMPQ  R11, CX
20331  	JAE   lz4s_s2_dstfull
20332  	ADDQ  R9, SI
20333  	LEAL  -1(R9), R11
20334  	CMPL  R11, $0x3c
20335  	JB    one_byte_lz4s_s2
20336  	CMPL  R11, $0x00000100
20337  	JB    two_bytes_lz4s_s2
20338  	CMPL  R11, $0x00010000
20339  	JB    three_bytes_lz4s_s2
20340  	CMPL  R11, $0x01000000
20341  	JB    four_bytes_lz4s_s2
20342  	MOVB  $0xfc, (AX)
20343  	MOVL  R11, 1(AX)
20344  	ADDQ  $0x05, AX
20345  	JMP   memmove_long_lz4s_s2
20346  
20347  four_bytes_lz4s_s2:
20348  	MOVL R11, R12
20349  	SHRL $0x10, R12
20350  	MOVB $0xf8, (AX)
20351  	MOVW R11, 1(AX)
20352  	MOVB R12, 3(AX)
20353  	ADDQ $0x04, AX
20354  	JMP  memmove_long_lz4s_s2
20355  
20356  three_bytes_lz4s_s2:
20357  	MOVB $0xf4, (AX)
20358  	MOVW R11, 1(AX)
20359  	ADDQ $0x03, AX
20360  	JMP  memmove_long_lz4s_s2
20361  
20362  two_bytes_lz4s_s2:
20363  	MOVB $0xf0, (AX)
20364  	MOVB R11, 1(AX)
20365  	ADDQ $0x02, AX
20366  	CMPL R11, $0x40
20367  	JB   memmove_lz4s_s2
20368  	JMP  memmove_long_lz4s_s2
20369  
20370  one_byte_lz4s_s2:
20371  	SHLB $0x02, R11
20372  	MOVB R11, (AX)
20373  	ADDQ $0x01, AX
20374  
20375  memmove_lz4s_s2:
20376  	LEAQ (AX)(R9*1), R11
20377  
20378  	// genMemMoveShort
20379  	CMPQ R9, $0x08
20380  	JBE  emit_lit_memmove_lz4s_s2_memmove_move_8
20381  	CMPQ R9, $0x10
20382  	JBE  emit_lit_memmove_lz4s_s2_memmove_move_8through16
20383  	CMPQ R9, $0x20
20384  	JBE  emit_lit_memmove_lz4s_s2_memmove_move_17through32
20385  	JMP  emit_lit_memmove_lz4s_s2_memmove_move_33through64
20386  
20387  emit_lit_memmove_lz4s_s2_memmove_move_8:
20388  	MOVQ (DX), R12
20389  	MOVQ R12, (AX)
20390  	JMP  memmove_end_copy_lz4s_s2
20391  
20392  emit_lit_memmove_lz4s_s2_memmove_move_8through16:
20393  	MOVQ (DX), R12
20394  	MOVQ -8(DX)(R9*1), DX
20395  	MOVQ R12, (AX)
20396  	MOVQ DX, -8(AX)(R9*1)
20397  	JMP  memmove_end_copy_lz4s_s2
20398  
20399  emit_lit_memmove_lz4s_s2_memmove_move_17through32:
20400  	MOVOU (DX), X0
20401  	MOVOU -16(DX)(R9*1), X1
20402  	MOVOU X0, (AX)
20403  	MOVOU X1, -16(AX)(R9*1)
20404  	JMP   memmove_end_copy_lz4s_s2
20405  
20406  emit_lit_memmove_lz4s_s2_memmove_move_33through64:
20407  	MOVOU (DX), X0
20408  	MOVOU 16(DX), X1
20409  	MOVOU -32(DX)(R9*1), X2
20410  	MOVOU -16(DX)(R9*1), X3
20411  	MOVOU X0, (AX)
20412  	MOVOU X1, 16(AX)
20413  	MOVOU X2, -32(AX)(R9*1)
20414  	MOVOU X3, -16(AX)(R9*1)
20415  
20416  memmove_end_copy_lz4s_s2:
20417  	MOVQ R11, AX
20418  	JMP  lz4s_s2_lits_emit_done
20419  
20420  memmove_long_lz4s_s2:
20421  	LEAQ (AX)(R9*1), R11
20422  
20423  	// genMemMoveLong
20424  	MOVOU (DX), X0
20425  	MOVOU 16(DX), X1
20426  	MOVOU -32(DX)(R9*1), X2
20427  	MOVOU -16(DX)(R9*1), X3
20428  	MOVQ  R9, R13
20429  	SHRQ  $0x05, R13
20430  	MOVQ  AX, R12
20431  	ANDL  $0x0000001f, R12
20432  	MOVQ  $0x00000040, R14
20433  	SUBQ  R12, R14
20434  	DECQ  R13
20435  	JA    emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32
20436  	LEAQ  -32(DX)(R14*1), R12
20437  	LEAQ  -32(AX)(R14*1), R15
20438  
20439  emit_lit_memmove_long_lz4s_s2large_big_loop_back:
20440  	MOVOU (R12), X4
20441  	MOVOU 16(R12), X5
20442  	MOVOA X4, (R15)
20443  	MOVOA X5, 16(R15)
20444  	ADDQ  $0x20, R15
20445  	ADDQ  $0x20, R12
20446  	ADDQ  $0x20, R14
20447  	DECQ  R13
20448  	JNA   emit_lit_memmove_long_lz4s_s2large_big_loop_back
20449  
20450  emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32:
20451  	MOVOU -32(DX)(R14*1), X4
20452  	MOVOU -16(DX)(R14*1), X5
20453  	MOVOA X4, -32(AX)(R14*1)
20454  	MOVOA X5, -16(AX)(R14*1)
20455  	ADDQ  $0x20, R14
20456  	CMPQ  R9, R14
20457  	JAE   emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32
20458  	MOVOU X0, (AX)
20459  	MOVOU X1, 16(AX)
20460  	MOVOU X2, -32(AX)(R9*1)
20461  	MOVOU X3, -16(AX)(R9*1)
20462  	MOVQ  R11, AX
20463  
20464  lz4s_s2_lits_emit_done:
20465  	MOVQ R8, DX
20466  
20467  lz4s_s2_lits_done:
20468  	CMPQ DX, BX
20469  	JNE  lz4s_s2_match
20470  	CMPQ R10, $0x03
20471  	JEQ  lz4s_s2_done
20472  	JMP  lz4s_s2_corrupt
20473  
20474  lz4s_s2_match:
20475  	CMPQ    R10, $0x03
20476  	JEQ     lz4s_s2_loop
20477  	LEAQ    2(DX), R8
20478  	CMPQ    R8, BX
20479  	JAE     lz4s_s2_corrupt
20480  	MOVWQZX (DX), R9
20481  	MOVQ    R8, DX
20482  	TESTQ   R9, R9
20483  	JZ      lz4s_s2_corrupt
20484  	CMPQ    R9, SI
20485  	JA      lz4s_s2_corrupt
20486  	CMPQ    R10, $0x12
20487  	JNE     lz4s_s2_ml_done
20488  
20489  lz4s_s2_ml_loop:
20490  	MOVBQZX (DX), R8
20491  	INCQ    DX
20492  	ADDQ    R8, R10
20493  	CMPQ    DX, BX
20494  	JAE     lz4s_s2_corrupt
20495  	CMPQ    R8, $0xff
20496  	JEQ     lz4s_s2_ml_loop
20497  
20498  lz4s_s2_ml_done:
20499  	ADDQ R10, SI
20500  	CMPQ R9, DI
20501  	JNE  lz4s_s2_docopy
20502  
20503  	// emitRepeat
20504  emit_repeat_again_lz4_s2:
20505  	MOVL R10, R8
20506  	LEAL -4(R10), R10
20507  	CMPL R8, $0x08
20508  	JBE  repeat_two_lz4_s2
20509  	CMPL R8, $0x0c
20510  	JAE  cant_repeat_two_offset_lz4_s2
20511  	CMPL R9, $0x00000800
20512  	JB   repeat_two_offset_lz4_s2
20513  
20514  cant_repeat_two_offset_lz4_s2:
20515  	CMPL R10, $0x00000104
20516  	JB   repeat_three_lz4_s2
20517  	CMPL R10, $0x00010100
20518  	JB   repeat_four_lz4_s2
20519  	CMPL R10, $0x0100ffff
20520  	JB   repeat_five_lz4_s2
20521  	LEAL -16842747(R10), R10
20522  	MOVL $0xfffb001d, (AX)
20523  	MOVB $0xff, 4(AX)
20524  	ADDQ $0x05, AX
20525  	JMP  emit_repeat_again_lz4_s2
20526  
20527  repeat_five_lz4_s2:
20528  	LEAL -65536(R10), R10
20529  	MOVL R10, R9
20530  	MOVW $0x001d, (AX)
20531  	MOVW R10, 2(AX)
20532  	SARL $0x10, R9
20533  	MOVB R9, 4(AX)
20534  	ADDQ $0x05, AX
20535  	JMP  lz4s_s2_loop
20536  
20537  repeat_four_lz4_s2:
20538  	LEAL -256(R10), R10
20539  	MOVW $0x0019, (AX)
20540  	MOVW R10, 2(AX)
20541  	ADDQ $0x04, AX
20542  	JMP  lz4s_s2_loop
20543  
20544  repeat_three_lz4_s2:
20545  	LEAL -4(R10), R10
20546  	MOVW $0x0015, (AX)
20547  	MOVB R10, 2(AX)
20548  	ADDQ $0x03, AX
20549  	JMP  lz4s_s2_loop
20550  
20551  repeat_two_lz4_s2:
20552  	SHLL $0x02, R10
20553  	ORL  $0x01, R10
20554  	MOVW R10, (AX)
20555  	ADDQ $0x02, AX
20556  	JMP  lz4s_s2_loop
20557  
20558  repeat_two_offset_lz4_s2:
20559  	XORQ R8, R8
20560  	LEAL 1(R8)(R10*4), R10
20561  	MOVB R9, 1(AX)
20562  	SARL $0x08, R9
20563  	SHLL $0x05, R9
20564  	ORL  R9, R10
20565  	MOVB R10, (AX)
20566  	ADDQ $0x02, AX
20567  	JMP  lz4s_s2_loop
20568  
20569  lz4s_s2_docopy:
20570  	MOVQ R9, DI
20571  
20572  	// emitCopy
20573  	CMPL R10, $0x40
20574  	JBE  two_byte_offset_short_lz4_s2
20575  	CMPL R9, $0x00000800
20576  	JAE  long_offset_short_lz4_s2
20577  	MOVL $0x00000001, R8
20578  	LEAL 16(R8), R8
20579  	MOVB R9, 1(AX)
20580  	MOVL R9, R11
20581  	SHRL $0x08, R11
20582  	SHLL $0x05, R11
20583  	ORL  R11, R8
20584  	MOVB R8, (AX)
20585  	ADDQ $0x02, AX
20586  	SUBL $0x08, R10
20587  
20588  	// emitRepeat
20589  	LEAL -4(R10), R10
20590  	JMP  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
20591  
20592  emit_repeat_again_lz4_s2_emit_copy_short_2b:
20593  	MOVL R10, R8
20594  	LEAL -4(R10), R10
20595  	CMPL R8, $0x08
20596  	JBE  repeat_two_lz4_s2_emit_copy_short_2b
20597  	CMPL R8, $0x0c
20598  	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
20599  	CMPL R9, $0x00000800
20600  	JB   repeat_two_offset_lz4_s2_emit_copy_short_2b
20601  
20602  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b:
20603  	CMPL R10, $0x00000104
20604  	JB   repeat_three_lz4_s2_emit_copy_short_2b
20605  	CMPL R10, $0x00010100
20606  	JB   repeat_four_lz4_s2_emit_copy_short_2b
20607  	CMPL R10, $0x0100ffff
20608  	JB   repeat_five_lz4_s2_emit_copy_short_2b
20609  	LEAL -16842747(R10), R10
20610  	MOVL $0xfffb001d, (AX)
20611  	MOVB $0xff, 4(AX)
20612  	ADDQ $0x05, AX
20613  	JMP  emit_repeat_again_lz4_s2_emit_copy_short_2b
20614  
20615  repeat_five_lz4_s2_emit_copy_short_2b:
20616  	LEAL -65536(R10), R10
20617  	MOVL R10, R9
20618  	MOVW $0x001d, (AX)
20619  	MOVW R10, 2(AX)
20620  	SARL $0x10, R9
20621  	MOVB R9, 4(AX)
20622  	ADDQ $0x05, AX
20623  	JMP  lz4s_s2_loop
20624  
20625  repeat_four_lz4_s2_emit_copy_short_2b:
20626  	LEAL -256(R10), R10
20627  	MOVW $0x0019, (AX)
20628  	MOVW R10, 2(AX)
20629  	ADDQ $0x04, AX
20630  	JMP  lz4s_s2_loop
20631  
20632  repeat_three_lz4_s2_emit_copy_short_2b:
20633  	LEAL -4(R10), R10
20634  	MOVW $0x0015, (AX)
20635  	MOVB R10, 2(AX)
20636  	ADDQ $0x03, AX
20637  	JMP  lz4s_s2_loop
20638  
20639  repeat_two_lz4_s2_emit_copy_short_2b:
20640  	SHLL $0x02, R10
20641  	ORL  $0x01, R10
20642  	MOVW R10, (AX)
20643  	ADDQ $0x02, AX
20644  	JMP  lz4s_s2_loop
20645  
20646  repeat_two_offset_lz4_s2_emit_copy_short_2b:
20647  	XORQ R8, R8
20648  	LEAL 1(R8)(R10*4), R10
20649  	MOVB R9, 1(AX)
20650  	SARL $0x08, R9
20651  	SHLL $0x05, R9
20652  	ORL  R9, R10
20653  	MOVB R10, (AX)
20654  	ADDQ $0x02, AX
20655  	JMP  lz4s_s2_loop
20656  
20657  long_offset_short_lz4_s2:
20658  	MOVB $0xee, (AX)
20659  	MOVW R9, 1(AX)
20660  	LEAL -60(R10), R10
20661  	ADDQ $0x03, AX
20662  
20663  	// emitRepeat
20664  emit_repeat_again_lz4_s2_emit_copy_short:
20665  	MOVL R10, R8
20666  	LEAL -4(R10), R10
20667  	CMPL R8, $0x08
20668  	JBE  repeat_two_lz4_s2_emit_copy_short
20669  	CMPL R8, $0x0c
20670  	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short
20671  	CMPL R9, $0x00000800
20672  	JB   repeat_two_offset_lz4_s2_emit_copy_short
20673  
20674  cant_repeat_two_offset_lz4_s2_emit_copy_short:
20675  	CMPL R10, $0x00000104
20676  	JB   repeat_three_lz4_s2_emit_copy_short
20677  	CMPL R10, $0x00010100
20678  	JB   repeat_four_lz4_s2_emit_copy_short
20679  	CMPL R10, $0x0100ffff
20680  	JB   repeat_five_lz4_s2_emit_copy_short
20681  	LEAL -16842747(R10), R10
20682  	MOVL $0xfffb001d, (AX)
20683  	MOVB $0xff, 4(AX)
20684  	ADDQ $0x05, AX
20685  	JMP  emit_repeat_again_lz4_s2_emit_copy_short
20686  
20687  repeat_five_lz4_s2_emit_copy_short:
20688  	LEAL -65536(R10), R10
20689  	MOVL R10, R9
20690  	MOVW $0x001d, (AX)
20691  	MOVW R10, 2(AX)
20692  	SARL $0x10, R9
20693  	MOVB R9, 4(AX)
20694  	ADDQ $0x05, AX
20695  	JMP  lz4s_s2_loop
20696  
20697  repeat_four_lz4_s2_emit_copy_short:
20698  	LEAL -256(R10), R10
20699  	MOVW $0x0019, (AX)
20700  	MOVW R10, 2(AX)
20701  	ADDQ $0x04, AX
20702  	JMP  lz4s_s2_loop
20703  
20704  repeat_three_lz4_s2_emit_copy_short:
20705  	LEAL -4(R10), R10
20706  	MOVW $0x0015, (AX)
20707  	MOVB R10, 2(AX)
20708  	ADDQ $0x03, AX
20709  	JMP  lz4s_s2_loop
20710  
20711  repeat_two_lz4_s2_emit_copy_short:
20712  	SHLL $0x02, R10
20713  	ORL  $0x01, R10
20714  	MOVW R10, (AX)
20715  	ADDQ $0x02, AX
20716  	JMP  lz4s_s2_loop
20717  
20718  repeat_two_offset_lz4_s2_emit_copy_short:
20719  	XORQ R8, R8
20720  	LEAL 1(R8)(R10*4), R10
20721  	MOVB R9, 1(AX)
20722  	SARL $0x08, R9
20723  	SHLL $0x05, R9
20724  	ORL  R9, R10
20725  	MOVB R10, (AX)
20726  	ADDQ $0x02, AX
20727  	JMP  lz4s_s2_loop
20728  
20729  two_byte_offset_short_lz4_s2:
20730  	MOVL R10, R8
20731  	SHLL $0x02, R8
20732  	CMPL R10, $0x0c
20733  	JAE  emit_copy_three_lz4_s2
20734  	CMPL R9, $0x00000800
20735  	JAE  emit_copy_three_lz4_s2
20736  	LEAL -15(R8), R8
20737  	MOVB R9, 1(AX)
20738  	SHRL $0x08, R9
20739  	SHLL $0x05, R9
20740  	ORL  R9, R8
20741  	MOVB R8, (AX)
20742  	ADDQ $0x02, AX
20743  	JMP  lz4s_s2_loop
20744  
20745  emit_copy_three_lz4_s2:
20746  	LEAL -2(R8), R8
20747  	MOVB R8, (AX)
20748  	MOVW R9, 1(AX)
20749  	ADDQ $0x03, AX
20750  	JMP  lz4s_s2_loop
20751  
20752  lz4s_s2_done:
20753  	MOVQ dst_base+0(FP), CX
20754  	SUBQ CX, AX
20755  	MOVQ SI, uncompressed+48(FP)
20756  	MOVQ AX, dstUsed+56(FP)
20757  	RET
20758  
20759  lz4s_s2_corrupt:
20760  	XORQ AX, AX
20761  	LEAQ -1(AX), SI
20762  	MOVQ SI, uncompressed+48(FP)
20763  	RET
20764  
20765  lz4s_s2_dstfull:
20766  	XORQ AX, AX
20767  	LEAQ -2(AX), SI
20768  	MOVQ SI, uncompressed+48(FP)
20769  	RET
20770  
20771  // func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
20772  // Requires: SSE2
20773  TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64
20774  	XORQ SI, SI
20775  	MOVQ dst_base+0(FP), AX
20776  	MOVQ dst_len+8(FP), CX
20777  	MOVQ src_base+24(FP), DX
20778  	MOVQ src_len+32(FP), BX
20779  	LEAQ (DX)(BX*1), BX
20780  	LEAQ -8(AX)(CX*1), CX
20781  
20782  lz4_snappy_loop:
20783  	CMPQ    DX, BX
20784  	JAE     lz4_snappy_corrupt
20785  	CMPQ    AX, CX
20786  	JAE     lz4_snappy_dstfull
20787  	MOVBQZX (DX), DI
20788  	MOVQ    DI, R8
20789  	MOVQ    DI, R9
20790  	SHRQ    $0x04, R8
20791  	ANDQ    $0x0f, R9
20792  	CMPQ    DI, $0xf0
20793  	JB      lz4_snappy_ll_end
20794  
20795  lz4_snappy_ll_loop:
20796  	INCQ    DX
20797  	CMPQ    DX, BX
20798  	JAE     lz4_snappy_corrupt
20799  	MOVBQZX (DX), DI
20800  	ADDQ    DI, R8
20801  	CMPQ    DI, $0xff
20802  	JEQ     lz4_snappy_ll_loop
20803  
20804  lz4_snappy_ll_end:
20805  	LEAQ  (DX)(R8*1), DI
20806  	ADDQ  $0x04, R9
20807  	CMPQ  DI, BX
20808  	JAE   lz4_snappy_corrupt
20809  	INCQ  DX
20810  	INCQ  DI
20811  	TESTQ R8, R8
20812  	JZ    lz4_snappy_lits_done
20813  	LEAQ  (AX)(R8*1), R10
20814  	CMPQ  R10, CX
20815  	JAE   lz4_snappy_dstfull
20816  	ADDQ  R8, SI
20817  	LEAL  -1(R8), R10
20818  	CMPL  R10, $0x3c
20819  	JB    one_byte_lz4_snappy
20820  	CMPL  R10, $0x00000100
20821  	JB    two_bytes_lz4_snappy
20822  	CMPL  R10, $0x00010000
20823  	JB    three_bytes_lz4_snappy
20824  	CMPL  R10, $0x01000000
20825  	JB    four_bytes_lz4_snappy
20826  	MOVB  $0xfc, (AX)
20827  	MOVL  R10, 1(AX)
20828  	ADDQ  $0x05, AX
20829  	JMP   memmove_long_lz4_snappy
20830  
20831  four_bytes_lz4_snappy:
20832  	MOVL R10, R11
20833  	SHRL $0x10, R11
20834  	MOVB $0xf8, (AX)
20835  	MOVW R10, 1(AX)
20836  	MOVB R11, 3(AX)
20837  	ADDQ $0x04, AX
20838  	JMP  memmove_long_lz4_snappy
20839  
20840  three_bytes_lz4_snappy:
20841  	MOVB $0xf4, (AX)
20842  	MOVW R10, 1(AX)
20843  	ADDQ $0x03, AX
20844  	JMP  memmove_long_lz4_snappy
20845  
20846  two_bytes_lz4_snappy:
20847  	MOVB $0xf0, (AX)
20848  	MOVB R10, 1(AX)
20849  	ADDQ $0x02, AX
20850  	CMPL R10, $0x40
20851  	JB   memmove_lz4_snappy
20852  	JMP  memmove_long_lz4_snappy
20853  
20854  one_byte_lz4_snappy:
20855  	SHLB $0x02, R10
20856  	MOVB R10, (AX)
20857  	ADDQ $0x01, AX
20858  
20859  memmove_lz4_snappy:
20860  	LEAQ (AX)(R8*1), R10
20861  
20862  	// genMemMoveShort
20863  	CMPQ R8, $0x08
20864  	JBE  emit_lit_memmove_lz4_snappy_memmove_move_8
20865  	CMPQ R8, $0x10
20866  	JBE  emit_lit_memmove_lz4_snappy_memmove_move_8through16
20867  	CMPQ R8, $0x20
20868  	JBE  emit_lit_memmove_lz4_snappy_memmove_move_17through32
20869  	JMP  emit_lit_memmove_lz4_snappy_memmove_move_33through64
20870  
20871  emit_lit_memmove_lz4_snappy_memmove_move_8:
20872  	MOVQ (DX), R11
20873  	MOVQ R11, (AX)
20874  	JMP  memmove_end_copy_lz4_snappy
20875  
20876  emit_lit_memmove_lz4_snappy_memmove_move_8through16:
20877  	MOVQ (DX), R11
20878  	MOVQ -8(DX)(R8*1), DX
20879  	MOVQ R11, (AX)
20880  	MOVQ DX, -8(AX)(R8*1)
20881  	JMP  memmove_end_copy_lz4_snappy
20882  
20883  emit_lit_memmove_lz4_snappy_memmove_move_17through32:
20884  	MOVOU (DX), X0
20885  	MOVOU -16(DX)(R8*1), X1
20886  	MOVOU X0, (AX)
20887  	MOVOU X1, -16(AX)(R8*1)
20888  	JMP   memmove_end_copy_lz4_snappy
20889  
20890  emit_lit_memmove_lz4_snappy_memmove_move_33through64:
20891  	MOVOU (DX), X0
20892  	MOVOU 16(DX), X1
20893  	MOVOU -32(DX)(R8*1), X2
20894  	MOVOU -16(DX)(R8*1), X3
20895  	MOVOU X0, (AX)
20896  	MOVOU X1, 16(AX)
20897  	MOVOU X2, -32(AX)(R8*1)
20898  	MOVOU X3, -16(AX)(R8*1)
20899  
20900  memmove_end_copy_lz4_snappy:
20901  	MOVQ R10, AX
20902  	JMP  lz4_snappy_lits_emit_done
20903  
20904  memmove_long_lz4_snappy:
20905  	LEAQ (AX)(R8*1), R10
20906  
20907  	// genMemMoveLong
20908  	MOVOU (DX), X0
20909  	MOVOU 16(DX), X1
20910  	MOVOU -32(DX)(R8*1), X2
20911  	MOVOU -16(DX)(R8*1), X3
20912  	MOVQ  R8, R12
20913  	SHRQ  $0x05, R12
20914  	MOVQ  AX, R11
20915  	ANDL  $0x0000001f, R11
20916  	MOVQ  $0x00000040, R13
20917  	SUBQ  R11, R13
20918  	DECQ  R12
20919  	JA    emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32
20920  	LEAQ  -32(DX)(R13*1), R11
20921  	LEAQ  -32(AX)(R13*1), R14
20922  
20923  emit_lit_memmove_long_lz4_snappylarge_big_loop_back:
20924  	MOVOU (R11), X4
20925  	MOVOU 16(R11), X5
20926  	MOVOA X4, (R14)
20927  	MOVOA X5, 16(R14)
20928  	ADDQ  $0x20, R14
20929  	ADDQ  $0x20, R11
20930  	ADDQ  $0x20, R13
20931  	DECQ  R12
20932  	JNA   emit_lit_memmove_long_lz4_snappylarge_big_loop_back
20933  
20934  emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32:
20935  	MOVOU -32(DX)(R13*1), X4
20936  	MOVOU -16(DX)(R13*1), X5
20937  	MOVOA X4, -32(AX)(R13*1)
20938  	MOVOA X5, -16(AX)(R13*1)
20939  	ADDQ  $0x20, R13
20940  	CMPQ  R8, R13
20941  	JAE   emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32
20942  	MOVOU X0, (AX)
20943  	MOVOU X1, 16(AX)
20944  	MOVOU X2, -32(AX)(R8*1)
20945  	MOVOU X3, -16(AX)(R8*1)
20946  	MOVQ  R10, AX
20947  
20948  lz4_snappy_lits_emit_done:
20949  	MOVQ DI, DX
20950  
20951  lz4_snappy_lits_done:
20952  	CMPQ DX, BX
20953  	JNE  lz4_snappy_match
20954  	CMPQ R9, $0x04
20955  	JEQ  lz4_snappy_done
20956  	JMP  lz4_snappy_corrupt
20957  
20958  lz4_snappy_match:
20959  	LEAQ    2(DX), DI
20960  	CMPQ    DI, BX
20961  	JAE     lz4_snappy_corrupt
20962  	MOVWQZX (DX), R8
20963  	MOVQ    DI, DX
20964  	TESTQ   R8, R8
20965  	JZ      lz4_snappy_corrupt
20966  	CMPQ    R8, SI
20967  	JA      lz4_snappy_corrupt
20968  	CMPQ    R9, $0x13
20969  	JNE     lz4_snappy_ml_done
20970  
20971  lz4_snappy_ml_loop:
20972  	MOVBQZX (DX), DI
20973  	INCQ    DX
20974  	ADDQ    DI, R9
20975  	CMPQ    DX, BX
20976  	JAE     lz4_snappy_corrupt
20977  	CMPQ    DI, $0xff
20978  	JEQ     lz4_snappy_ml_loop
20979  
20980  lz4_snappy_ml_done:
20981  	ADDQ R9, SI
20982  
20983  	// emitCopy
20984  two_byte_offset_lz4_s2:
20985  	CMPL R9, $0x40
20986  	JBE  two_byte_offset_short_lz4_s2
20987  	MOVB $0xee, (AX)
20988  	MOVW R8, 1(AX)
20989  	LEAL -60(R9), R9
20990  	ADDQ $0x03, AX
20991  	CMPQ AX, CX
20992  	JAE  lz4_snappy_loop
20993  	JMP  two_byte_offset_lz4_s2
20994  
20995  two_byte_offset_short_lz4_s2:
20996  	MOVL R9, DI
20997  	SHLL $0x02, DI
20998  	CMPL R9, $0x0c
20999  	JAE  emit_copy_three_lz4_s2
21000  	CMPL R8, $0x00000800
21001  	JAE  emit_copy_three_lz4_s2
21002  	LEAL -15(DI), DI
21003  	MOVB R8, 1(AX)
21004  	SHRL $0x08, R8
21005  	SHLL $0x05, R8
21006  	ORL  R8, DI
21007  	MOVB DI, (AX)
21008  	ADDQ $0x02, AX
21009  	JMP  lz4_snappy_loop
21010  
21011  emit_copy_three_lz4_s2:
21012  	LEAL -2(DI), DI
21013  	MOVB DI, (AX)
21014  	MOVW R8, 1(AX)
21015  	ADDQ $0x03, AX
21016  	JMP  lz4_snappy_loop
21017  
21018  lz4_snappy_done:
21019  	MOVQ dst_base+0(FP), CX
21020  	SUBQ CX, AX
21021  	MOVQ SI, uncompressed+48(FP)
21022  	MOVQ AX, dstUsed+56(FP)
21023  	RET
21024  
21025  lz4_snappy_corrupt:
21026  	XORQ AX, AX
21027  	LEAQ -1(AX), SI
21028  	MOVQ SI, uncompressed+48(FP)
21029  	RET
21030  
21031  lz4_snappy_dstfull:
21032  	XORQ AX, AX
21033  	LEAQ -2(AX), SI
21034  	MOVQ SI, uncompressed+48(FP)
21035  	RET
21036  
21037  // func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
21038  // Requires: SSE2
21039  TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64
21040  	XORQ SI, SI
21041  	MOVQ dst_base+0(FP), AX
21042  	MOVQ dst_len+8(FP), CX
21043  	MOVQ src_base+24(FP), DX
21044  	MOVQ src_len+32(FP), BX
21045  	LEAQ (DX)(BX*1), BX
21046  	LEAQ -8(AX)(CX*1), CX
21047  
21048  lz4s_snappy_loop:
21049  	CMPQ    DX, BX
21050  	JAE     lz4s_snappy_corrupt
21051  	CMPQ    AX, CX
21052  	JAE     lz4s_snappy_dstfull
21053  	MOVBQZX (DX), DI
21054  	MOVQ    DI, R8
21055  	MOVQ    DI, R9
21056  	SHRQ    $0x04, R8
21057  	ANDQ    $0x0f, R9
21058  	CMPQ    DI, $0xf0
21059  	JB      lz4s_snappy_ll_end
21060  
21061  lz4s_snappy_ll_loop:
21062  	INCQ    DX
21063  	CMPQ    DX, BX
21064  	JAE     lz4s_snappy_corrupt
21065  	MOVBQZX (DX), DI
21066  	ADDQ    DI, R8
21067  	CMPQ    DI, $0xff
21068  	JEQ     lz4s_snappy_ll_loop
21069  
21070  lz4s_snappy_ll_end:
21071  	LEAQ  (DX)(R8*1), DI
21072  	ADDQ  $0x03, R9
21073  	CMPQ  DI, BX
21074  	JAE   lz4s_snappy_corrupt
21075  	INCQ  DX
21076  	INCQ  DI
21077  	TESTQ R8, R8
21078  	JZ    lz4s_snappy_lits_done
21079  	LEAQ  (AX)(R8*1), R10
21080  	CMPQ  R10, CX
21081  	JAE   lz4s_snappy_dstfull
21082  	ADDQ  R8, SI
21083  	LEAL  -1(R8), R10
21084  	CMPL  R10, $0x3c
21085  	JB    one_byte_lz4s_snappy
21086  	CMPL  R10, $0x00000100
21087  	JB    two_bytes_lz4s_snappy
21088  	CMPL  R10, $0x00010000
21089  	JB    three_bytes_lz4s_snappy
21090  	CMPL  R10, $0x01000000
21091  	JB    four_bytes_lz4s_snappy
21092  	MOVB  $0xfc, (AX)
21093  	MOVL  R10, 1(AX)
21094  	ADDQ  $0x05, AX
21095  	JMP   memmove_long_lz4s_snappy
21096  
21097  four_bytes_lz4s_snappy:
21098  	MOVL R10, R11
21099  	SHRL $0x10, R11
21100  	MOVB $0xf8, (AX)
21101  	MOVW R10, 1(AX)
21102  	MOVB R11, 3(AX)
21103  	ADDQ $0x04, AX
21104  	JMP  memmove_long_lz4s_snappy
21105  
21106  three_bytes_lz4s_snappy:
21107  	MOVB $0xf4, (AX)
21108  	MOVW R10, 1(AX)
21109  	ADDQ $0x03, AX
21110  	JMP  memmove_long_lz4s_snappy
21111  
21112  two_bytes_lz4s_snappy:
21113  	MOVB $0xf0, (AX)
21114  	MOVB R10, 1(AX)
21115  	ADDQ $0x02, AX
21116  	CMPL R10, $0x40
21117  	JB   memmove_lz4s_snappy
21118  	JMP  memmove_long_lz4s_snappy
21119  
21120  one_byte_lz4s_snappy:
21121  	SHLB $0x02, R10
21122  	MOVB R10, (AX)
21123  	ADDQ $0x01, AX
21124  
21125  memmove_lz4s_snappy:
21126  	LEAQ (AX)(R8*1), R10
21127  
21128  	// genMemMoveShort
21129  	CMPQ R8, $0x08
21130  	JBE  emit_lit_memmove_lz4s_snappy_memmove_move_8
21131  	CMPQ R8, $0x10
21132  	JBE  emit_lit_memmove_lz4s_snappy_memmove_move_8through16
21133  	CMPQ R8, $0x20
21134  	JBE  emit_lit_memmove_lz4s_snappy_memmove_move_17through32
21135  	JMP  emit_lit_memmove_lz4s_snappy_memmove_move_33through64
21136  
21137  emit_lit_memmove_lz4s_snappy_memmove_move_8:
21138  	MOVQ (DX), R11
21139  	MOVQ R11, (AX)
21140  	JMP  memmove_end_copy_lz4s_snappy
21141  
21142  emit_lit_memmove_lz4s_snappy_memmove_move_8through16:
21143  	MOVQ (DX), R11
21144  	MOVQ -8(DX)(R8*1), DX
21145  	MOVQ R11, (AX)
21146  	MOVQ DX, -8(AX)(R8*1)
21147  	JMP  memmove_end_copy_lz4s_snappy
21148  
21149  emit_lit_memmove_lz4s_snappy_memmove_move_17through32:
21150  	MOVOU (DX), X0
21151  	MOVOU -16(DX)(R8*1), X1
21152  	MOVOU X0, (AX)
21153  	MOVOU X1, -16(AX)(R8*1)
21154  	JMP   memmove_end_copy_lz4s_snappy
21155  
21156  emit_lit_memmove_lz4s_snappy_memmove_move_33through64:
21157  	MOVOU (DX), X0
21158  	MOVOU 16(DX), X1
21159  	MOVOU -32(DX)(R8*1), X2
21160  	MOVOU -16(DX)(R8*1), X3
21161  	MOVOU X0, (AX)
21162  	MOVOU X1, 16(AX)
21163  	MOVOU X2, -32(AX)(R8*1)
21164  	MOVOU X3, -16(AX)(R8*1)
21165  
21166  memmove_end_copy_lz4s_snappy:
21167  	MOVQ R10, AX
21168  	JMP  lz4s_snappy_lits_emit_done
21169  
21170  memmove_long_lz4s_snappy:
21171  	LEAQ (AX)(R8*1), R10
21172  
21173  	// genMemMoveLong
21174  	MOVOU (DX), X0
21175  	MOVOU 16(DX), X1
21176  	MOVOU -32(DX)(R8*1), X2
21177  	MOVOU -16(DX)(R8*1), X3
21178  	MOVQ  R8, R12
21179  	SHRQ  $0x05, R12
21180  	MOVQ  AX, R11
21181  	ANDL  $0x0000001f, R11
21182  	MOVQ  $0x00000040, R13
21183  	SUBQ  R11, R13
21184  	DECQ  R12
21185  	JA    emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32
21186  	LEAQ  -32(DX)(R13*1), R11
21187  	LEAQ  -32(AX)(R13*1), R14
21188  
21189  emit_lit_memmove_long_lz4s_snappylarge_big_loop_back:
21190  	MOVOU (R11), X4
21191  	MOVOU 16(R11), X5
21192  	MOVOA X4, (R14)
21193  	MOVOA X5, 16(R14)
21194  	ADDQ  $0x20, R14
21195  	ADDQ  $0x20, R11
21196  	ADDQ  $0x20, R13
21197  	DECQ  R12
21198  	JNA   emit_lit_memmove_long_lz4s_snappylarge_big_loop_back
21199  
21200  emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32:
21201  	MOVOU -32(DX)(R13*1), X4
21202  	MOVOU -16(DX)(R13*1), X5
21203  	MOVOA X4, -32(AX)(R13*1)
21204  	MOVOA X5, -16(AX)(R13*1)
21205  	ADDQ  $0x20, R13
21206  	CMPQ  R8, R13
21207  	JAE   emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32
21208  	MOVOU X0, (AX)
21209  	MOVOU X1, 16(AX)
21210  	MOVOU X2, -32(AX)(R8*1)
21211  	MOVOU X3, -16(AX)(R8*1)
21212  	MOVQ  R10, AX
21213  
21214  lz4s_snappy_lits_emit_done:
21215  	MOVQ DI, DX
21216  
21217  lz4s_snappy_lits_done:
21218  	CMPQ DX, BX
21219  	JNE  lz4s_snappy_match
21220  	CMPQ R9, $0x03
21221  	JEQ  lz4s_snappy_done
21222  	JMP  lz4s_snappy_corrupt
21223  
21224  lz4s_snappy_match:
21225  	CMPQ    R9, $0x03
21226  	JEQ     lz4s_snappy_loop
21227  	LEAQ    2(DX), DI
21228  	CMPQ    DI, BX
21229  	JAE     lz4s_snappy_corrupt
21230  	MOVWQZX (DX), R8
21231  	MOVQ    DI, DX
21232  	TESTQ   R8, R8
21233  	JZ      lz4s_snappy_corrupt
21234  	CMPQ    R8, SI
21235  	JA      lz4s_snappy_corrupt
21236  	CMPQ    R9, $0x12
21237  	JNE     lz4s_snappy_ml_done
21238  
21239  lz4s_snappy_ml_loop:
21240  	MOVBQZX (DX), DI
21241  	INCQ    DX
21242  	ADDQ    DI, R9
21243  	CMPQ    DX, BX
21244  	JAE     lz4s_snappy_corrupt
21245  	CMPQ    DI, $0xff
21246  	JEQ     lz4s_snappy_ml_loop
21247  
21248  lz4s_snappy_ml_done:
21249  	ADDQ R9, SI
21250  
21251  	// emitCopy
21252  two_byte_offset_lz4_s2:
21253  	CMPL R9, $0x40
21254  	JBE  two_byte_offset_short_lz4_s2
21255  	MOVB $0xee, (AX)
21256  	MOVW R8, 1(AX)
21257  	LEAL -60(R9), R9
21258  	ADDQ $0x03, AX
21259  	CMPQ AX, CX
21260  	JAE  lz4s_snappy_loop
21261  	JMP  two_byte_offset_lz4_s2
21262  
21263  two_byte_offset_short_lz4_s2:
21264  	MOVL R9, DI
21265  	SHLL $0x02, DI
21266  	CMPL R9, $0x0c
21267  	JAE  emit_copy_three_lz4_s2
21268  	CMPL R8, $0x00000800
21269  	JAE  emit_copy_three_lz4_s2
21270  	LEAL -15(DI), DI
21271  	MOVB R8, 1(AX)
21272  	SHRL $0x08, R8
21273  	SHLL $0x05, R8
21274  	ORL  R8, DI
21275  	MOVB DI, (AX)
21276  	ADDQ $0x02, AX
21277  	JMP  lz4s_snappy_loop
21278  
21279  emit_copy_three_lz4_s2:
21280  	LEAL -2(DI), DI
21281  	MOVB DI, (AX)
21282  	MOVW R8, 1(AX)
21283  	ADDQ $0x03, AX
21284  	JMP  lz4s_snappy_loop
21285  
21286  lz4s_snappy_done:
21287  	MOVQ dst_base+0(FP), CX
21288  	SUBQ CX, AX
21289  	MOVQ SI, uncompressed+48(FP)
21290  	MOVQ AX, dstUsed+56(FP)
21291  	RET
21292  
21293  lz4s_snappy_corrupt:
21294  	XORQ AX, AX
21295  	LEAQ -1(AX), SI
21296  	MOVQ SI, uncompressed+48(FP)
21297  	RET
21298  
21299  lz4s_snappy_dstfull:
21300  	XORQ AX, AX
21301  	LEAQ -2(AX), SI
21302  	MOVQ SI, uncompressed+48(FP)
21303  	RET
21304