memcpy.c raw

   1  #include <string.h>
   2  #include <stdint.h>
   3  #include <endian.h>
   4  
   5  void *memcpy(void *restrict dest, const void *restrict src, size_t n)
   6  {
   7  	unsigned char *d = dest;
   8  	const unsigned char *s = src;
   9  
  10  #ifdef __GNUC__
  11  
  12  #if __BYTE_ORDER == __LITTLE_ENDIAN
  13  #define LS >>
  14  #define RS <<
  15  #else
  16  #define LS <<
  17  #define RS >>
  18  #endif
  19  
  20  	typedef uint32_t __attribute__((__may_alias__)) u32;
  21  	uint32_t w, x;
  22  
  23  	for (; (uintptr_t)s % 4 && n; n--) *d++ = *s++;
  24  
  25  	if ((uintptr_t)d % 4 == 0) {
  26  		for (; n>=16; s+=16, d+=16, n-=16) {
  27  			*(u32 *)(d+0) = *(u32 *)(s+0);
  28  			*(u32 *)(d+4) = *(u32 *)(s+4);
  29  			*(u32 *)(d+8) = *(u32 *)(s+8);
  30  			*(u32 *)(d+12) = *(u32 *)(s+12);
  31  		}
  32  		if (n&8) {
  33  			*(u32 *)(d+0) = *(u32 *)(s+0);
  34  			*(u32 *)(d+4) = *(u32 *)(s+4);
  35  			d += 8; s += 8;
  36  		}
  37  		if (n&4) {
  38  			*(u32 *)(d+0) = *(u32 *)(s+0);
  39  			d += 4; s += 4;
  40  		}
  41  		if (n&2) {
  42  			*d++ = *s++; *d++ = *s++;
  43  		}
  44  		if (n&1) {
  45  			*d = *s;
  46  		}
  47  		return dest;
  48  	}
  49  
  50  	if (n >= 32) switch ((uintptr_t)d % 4) {
  51  	case 1:
  52  		w = *(u32 *)s;
  53  		*d++ = *s++;
  54  		*d++ = *s++;
  55  		*d++ = *s++;
  56  		n -= 3;
  57  		for (; n>=17; s+=16, d+=16, n-=16) {
  58  			x = *(u32 *)(s+1);
  59  			*(u32 *)(d+0) = (w LS 24) | (x RS 8);
  60  			w = *(u32 *)(s+5);
  61  			*(u32 *)(d+4) = (x LS 24) | (w RS 8);
  62  			x = *(u32 *)(s+9);
  63  			*(u32 *)(d+8) = (w LS 24) | (x RS 8);
  64  			w = *(u32 *)(s+13);
  65  			*(u32 *)(d+12) = (x LS 24) | (w RS 8);
  66  		}
  67  		break;
  68  	case 2:
  69  		w = *(u32 *)s;
  70  		*d++ = *s++;
  71  		*d++ = *s++;
  72  		n -= 2;
  73  		for (; n>=18; s+=16, d+=16, n-=16) {
  74  			x = *(u32 *)(s+2);
  75  			*(u32 *)(d+0) = (w LS 16) | (x RS 16);
  76  			w = *(u32 *)(s+6);
  77  			*(u32 *)(d+4) = (x LS 16) | (w RS 16);
  78  			x = *(u32 *)(s+10);
  79  			*(u32 *)(d+8) = (w LS 16) | (x RS 16);
  80  			w = *(u32 *)(s+14);
  81  			*(u32 *)(d+12) = (x LS 16) | (w RS 16);
  82  		}
  83  		break;
  84  	case 3:
  85  		w = *(u32 *)s;
  86  		*d++ = *s++;
  87  		n -= 1;
  88  		for (; n>=19; s+=16, d+=16, n-=16) {
  89  			x = *(u32 *)(s+3);
  90  			*(u32 *)(d+0) = (w LS 8) | (x RS 24);
  91  			w = *(u32 *)(s+7);
  92  			*(u32 *)(d+4) = (x LS 8) | (w RS 24);
  93  			x = *(u32 *)(s+11);
  94  			*(u32 *)(d+8) = (w LS 8) | (x RS 24);
  95  			w = *(u32 *)(s+15);
  96  			*(u32 *)(d+12) = (x LS 8) | (w RS 24);
  97  		}
  98  		break;
  99  	}
 100  	if (n&16) {
 101  		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
 102  		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
 103  		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
 104  		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
 105  	}
 106  	if (n&8) {
 107  		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
 108  		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
 109  	}
 110  	if (n&4) {
 111  		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
 112  	}
 113  	if (n&2) {
 114  		*d++ = *s++; *d++ = *s++;
 115  	}
 116  	if (n&1) {
 117  		*d = *s;
 118  	}
 119  	return dest;
 120  #endif
 121  
 122  	for (; n; n--) *d++ = *s++;
 123  	return dest;
 124  }
 125