memset.S raw

   1  /*
   2   * memset - fill memory with a constant byte
   3   *
   4   * Copyright (c) 2012-2020, Arm Limited.
   5   * SPDX-License-Identifier: MIT
   6   */
   7  
   8  /* Assumptions:
   9   *
  10   * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
  11   *
  12   */
  13  
  14  #define dstin   x0
  15  #define val     x1
  16  #define valw    w1
  17  #define count   x2
  18  #define dst     x3
  19  #define dstend  x4
  20  #define zva_val x5
  21  
  22  .global memset
  23  .type memset,%function
  24  memset:
  25  
  26  	dup     v0.16B, valw
  27  	add     dstend, dstin, count
  28  
  29  	cmp     count, 96
  30  	b.hi    .Lset_long
  31  	cmp     count, 16
  32  	b.hs    .Lset_medium
  33  	mov     val, v0.D[0]
  34  
  35  	/* Set 0..15 bytes.  */
  36  	tbz     count, 3, 1f
  37  	str     val, [dstin]
  38  	str     val, [dstend, -8]
  39  	ret
  40  	nop
  41  1:      tbz     count, 2, 2f
  42  	str     valw, [dstin]
  43  	str     valw, [dstend, -4]
  44  	ret
  45  2:      cbz     count, 3f
  46  	strb    valw, [dstin]
  47  	tbz     count, 1, 3f
  48  	strh    valw, [dstend, -2]
  49  3:      ret
  50  
  51  	/* Set 17..96 bytes.  */
  52  .Lset_medium:
  53  	str     q0, [dstin]
  54  	tbnz    count, 6, .Lset96
  55  	str     q0, [dstend, -16]
  56  	tbz     count, 5, 1f
  57  	str     q0, [dstin, 16]
  58  	str     q0, [dstend, -32]
  59  1:      ret
  60  
  61  	.p2align 4
  62  	/* Set 64..96 bytes.  Write 64 bytes from the start and
  63  	   32 bytes from the end.  */
  64  .Lset96:
  65  	str     q0, [dstin, 16]
  66  	stp     q0, q0, [dstin, 32]
  67  	stp     q0, q0, [dstend, -32]
  68  	ret
  69  
  70  	.p2align 4
  71  .Lset_long:
  72  	and     valw, valw, 255
  73  	bic     dst, dstin, 15
  74  	str     q0, [dstin]
  75  	cmp     count, 160
  76  	ccmp    valw, 0, 0, hs
  77  	b.ne    .Lno_zva
  78  
  79  #ifndef SKIP_ZVA_CHECK
  80  	mrs     zva_val, dczid_el0
  81  	and     zva_val, zva_val, 31
  82  	cmp     zva_val, 4              /* ZVA size is 64 bytes.  */
  83  	b.ne    .Lno_zva
  84  #endif
  85  	str     q0, [dst, 16]
  86  	stp     q0, q0, [dst, 32]
  87  	bic     dst, dst, 63
  88  	sub     count, dstend, dst      /* Count is now 64 too large.  */
  89  	sub     count, count, 128       /* Adjust count and bias for loop.  */
  90  
  91  	.p2align 4
  92  .Lzva_loop:
  93  	add     dst, dst, 64
  94  	dc      zva, dst
  95  	subs    count, count, 64
  96  	b.hi    .Lzva_loop
  97  	stp     q0, q0, [dstend, -64]
  98  	stp     q0, q0, [dstend, -32]
  99  	ret
 100  
 101  .Lno_zva:
 102  	sub     count, dstend, dst      /* Count is 16 too large.  */
 103  	sub     dst, dst, 16            /* Dst is biased by -32.  */
 104  	sub     count, count, 64 + 16   /* Adjust count and bias for loop.  */
 105  .Lno_zva_loop:
 106  	stp     q0, q0, [dst, 32]
 107  	stp     q0, q0, [dst, 64]!
 108  	subs    count, count, 64
 109  	b.hi    .Lno_zva_loop
 110  	stp     q0, q0, [dstend, -64]
 111  	stp     q0, q0, [dstend, -32]
 112  	ret
 113  
 114  .size memset,.-memset
 115  
 116