| OLD | NEW |
| 1 #include <string.h> | 1 #include <string.h> |
| 2 #include <stdint.h> | 2 #include <stdint.h> |
| 3 #include <endian.h> | 3 #include <endian.h> |
| 4 | 4 |
| 5 void *memcpy(void *restrict dest, const void *restrict src, size_t n) | 5 void* memcpy(void* restrict dest, const void* restrict src, size_t n) { |
| 6 { | 6 unsigned char* d = dest; |
| 7 » unsigned char *d = dest; | 7 const unsigned char* s = src; |
| 8 » const unsigned char *s = src; | |
| 9 | 8 |
| 10 #ifdef __GNUC__ | 9 #ifdef __GNUC__ |
| 11 | 10 |
| 12 #if __BYTE_ORDER == __LITTLE_ENDIAN | 11 #if __BYTE_ORDER == __LITTLE_ENDIAN |
| 13 #define LS >> | 12 #define LS >> |
| 14 #define RS << | 13 #define RS << |
| 15 #else | 14 #else |
| 16 #define LS << | 15 #define LS << |
| 17 #define RS >> | 16 #define RS >> |
| 18 #endif | 17 #endif |
| 19 | 18 |
| 20 » typedef uint32_t __attribute__((__may_alias__)) u32; | 19 typedef uint32_t __attribute__((__may_alias__)) u32; |
| 21 » uint32_t w, x; | 20 uint32_t w, x; |
| 22 | 21 |
| 23 » for (; (uintptr_t)s % 4 && n; n--) *d++ = *s++; | 22 for (; (uintptr_t)s % 4 && n; n--) |
| 23 *d++ = *s++; |
| 24 | 24 |
| 25 » if ((uintptr_t)d % 4 == 0) { | 25 if ((uintptr_t)d % 4 == 0) { |
| 26 » » for (; n>=16; s+=16, d+=16, n-=16) { | 26 for (; n >= 16; s += 16, d += 16, n -= 16) { |
| 27 » » » *(u32 *)(d+0) = *(u32 *)(s+0); | 27 *(u32*)(d + 0) = *(u32*)(s + 0); |
| 28 » » » *(u32 *)(d+4) = *(u32 *)(s+4); | 28 *(u32*)(d + 4) = *(u32*)(s + 4); |
| 29 » » » *(u32 *)(d+8) = *(u32 *)(s+8); | 29 *(u32*)(d + 8) = *(u32*)(s + 8); |
| 30 » » » *(u32 *)(d+12) = *(u32 *)(s+12); | 30 *(u32*)(d + 12) = *(u32*)(s + 12); |
| 31 » » } | 31 } |
| 32 » » if (n&8) { | 32 if (n & 8) { |
| 33 » » » *(u32 *)(d+0) = *(u32 *)(s+0); | 33 *(u32*)(d + 0) = *(u32*)(s + 0); |
| 34 » » » *(u32 *)(d+4) = *(u32 *)(s+4); | 34 *(u32*)(d + 4) = *(u32*)(s + 4); |
| 35 » » » d += 8; s += 8; | 35 d += 8; |
| 36 » » } | 36 s += 8; |
| 37 » » if (n&4) { | 37 } |
| 38 » » » *(u32 *)(d+0) = *(u32 *)(s+0); | 38 if (n & 4) { |
| 39 » » » d += 4; s += 4; | 39 *(u32*)(d + 0) = *(u32*)(s + 0); |
| 40 » » } | 40 d += 4; |
| 41 » » if (n&2) { | 41 s += 4; |
| 42 » » » *d++ = *s++; *d++ = *s++; | 42 } |
| 43 » » } | 43 if (n & 2) { |
| 44 » » if (n&1) { | 44 *d++ = *s++; |
| 45 » » » *d = *s; | 45 *d++ = *s++; |
| 46 » » } | 46 } |
| 47 » » return dest; | 47 if (n & 1) { |
| 48 » } | 48 *d = *s; |
| 49 } |
| 50 return dest; |
| 51 } |
| 49 | 52 |
| 50 » if (n >= 32) switch ((uintptr_t)d % 4) { | 53 if (n >= 32) |
| 51 » case 1: | 54 switch ((uintptr_t)d % 4) { |
| 52 » » w = *(u32 *)s; | 55 case 1: |
| 53 » » *d++ = *s++; | 56 w = *(u32*)s; |
| 54 » » *d++ = *s++; | 57 *d++ = *s++; |
| 55 » » *d++ = *s++; | 58 *d++ = *s++; |
| 56 » » n -= 3; | 59 *d++ = *s++; |
| 57 » » for (; n>=17; s+=16, d+=16, n-=16) { | 60 n -= 3; |
| 58 » » » x = *(u32 *)(s+1); | 61 for (; n >= 17; s += 16, d += 16, n -= 16) { |
| 59 » » » *(u32 *)(d+0) = (w LS 24) | (x RS 8); | 62 x = *(u32*)(s + 1); |
| 60 » » » w = *(u32 *)(s+5); | 63 *(u32*)(d + 0) = (w LS 24) | (x RS 8); |
| 61 » » » *(u32 *)(d+4) = (x LS 24) | (w RS 8); | 64 w = *(u32*)(s + 5); |
| 62 » » » x = *(u32 *)(s+9); | 65 *(u32*)(d + 4) = (x LS 24) | (w RS 8); |
| 63 » » » *(u32 *)(d+8) = (w LS 24) | (x RS 8); | 66 x = *(u32*)(s + 9); |
| 64 » » » w = *(u32 *)(s+13); | 67 *(u32*)(d + 8) = (w LS 24) | (x RS 8); |
| 65 » » » *(u32 *)(d+12) = (x LS 24) | (w RS 8); | 68 w = *(u32*)(s + 13); |
| 66 » » } | 69 *(u32*)(d + 12) = (x LS 24) | (w RS 8); |
| 67 » » break; | 70 } |
| 68 » case 2: | 71 break; |
| 69 » » w = *(u32 *)s; | 72 case 2: |
| 70 » » *d++ = *s++; | 73 w = *(u32*)s; |
| 71 » » *d++ = *s++; | 74 *d++ = *s++; |
| 72 » » n -= 2; | 75 *d++ = *s++; |
| 73 » » for (; n>=18; s+=16, d+=16, n-=16) { | 76 n -= 2; |
| 74 » » » x = *(u32 *)(s+2); | 77 for (; n >= 18; s += 16, d += 16, n -= 16) { |
| 75 » » » *(u32 *)(d+0) = (w LS 16) | (x RS 16); | 78 x = *(u32*)(s + 2); |
| 76 » » » w = *(u32 *)(s+6); | 79 *(u32*)(d + 0) = (w LS 16) | (x RS 16); |
| 77 » » » *(u32 *)(d+4) = (x LS 16) | (w RS 16); | 80 w = *(u32*)(s + 6); |
| 78 » » » x = *(u32 *)(s+10); | 81 *(u32*)(d + 4) = (x LS 16) | (w RS 16); |
| 79 » » » *(u32 *)(d+8) = (w LS 16) | (x RS 16); | 82 x = *(u32*)(s + 10); |
| 80 » » » w = *(u32 *)(s+14); | 83 *(u32*)(d + 8) = (w LS 16) | (x RS 16); |
| 81 » » » *(u32 *)(d+12) = (x LS 16) | (w RS 16); | 84 w = *(u32*)(s + 14); |
| 82 » » } | 85 *(u32*)(d + 12) = (x LS 16) | (w RS 16); |
| 83 » » break; | 86 } |
| 84 » case 3: | 87 break; |
| 85 » » w = *(u32 *)s; | 88 case 3: |
| 86 » » *d++ = *s++; | 89 w = *(u32*)s; |
| 87 » » n -= 1; | 90 *d++ = *s++; |
| 88 » » for (; n>=19; s+=16, d+=16, n-=16) { | 91 n -= 1; |
| 89 » » » x = *(u32 *)(s+3); | 92 for (; n >= 19; s += 16, d += 16, n -= 16) { |
| 90 » » » *(u32 *)(d+0) = (w LS 8) | (x RS 24); | 93 x = *(u32*)(s + 3); |
| 91 » » » w = *(u32 *)(s+7); | 94 *(u32*)(d + 0) = (w LS 8) | (x RS 24); |
| 92 » » » *(u32 *)(d+4) = (x LS 8) | (w RS 24); | 95 w = *(u32*)(s + 7); |
| 93 » » » x = *(u32 *)(s+11); | 96 *(u32*)(d + 4) = (x LS 8) | (w RS 24); |
| 94 » » » *(u32 *)(d+8) = (w LS 8) | (x RS 24); | 97 x = *(u32*)(s + 11); |
| 95 » » » w = *(u32 *)(s+15); | 98 *(u32*)(d + 8) = (w LS 8) | (x RS 24); |
| 96 » » » *(u32 *)(d+12) = (x LS 8) | (w RS 24); | 99 w = *(u32*)(s + 15); |
| 97 » » } | 100 *(u32*)(d + 12) = (x LS 8) | (w RS 24); |
| 98 » » break; | 101 } |
| 99 » } | 102 break; |
| 100 » if (n&16) { | 103 } |
| 101 » » *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | 104 if (n & 16) { |
| 102 » » *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | 105 *d++ = *s++; |
| 103 » » *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | 106 *d++ = *s++; |
| 104 » » *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | 107 *d++ = *s++; |
| 105 » } | 108 *d++ = *s++; |
| 106 » if (n&8) { | 109 *d++ = *s++; |
| 107 » » *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | 110 *d++ = *s++; |
| 108 » » *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | 111 *d++ = *s++; |
| 109 » } | 112 *d++ = *s++; |
| 110 » if (n&4) { | 113 *d++ = *s++; |
| 111 » » *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | 114 *d++ = *s++; |
| 112 » } | 115 *d++ = *s++; |
| 113 » if (n&2) { | 116 *d++ = *s++; |
| 114 » » *d++ = *s++; *d++ = *s++; | 117 *d++ = *s++; |
| 115 » } | 118 *d++ = *s++; |
| 116 » if (n&1) { | 119 *d++ = *s++; |
| 117 » » *d = *s; | 120 *d++ = *s++; |
| 118 » } | 121 } |
| 119 » return dest; | 122 if (n & 8) { |
| 123 *d++ = *s++; |
| 124 *d++ = *s++; |
| 125 *d++ = *s++; |
| 126 *d++ = *s++; |
| 127 *d++ = *s++; |
| 128 *d++ = *s++; |
| 129 *d++ = *s++; |
| 130 *d++ = *s++; |
| 131 } |
| 132 if (n & 4) { |
| 133 *d++ = *s++; |
| 134 *d++ = *s++; |
| 135 *d++ = *s++; |
| 136 *d++ = *s++; |
| 137 } |
| 138 if (n & 2) { |
| 139 *d++ = *s++; |
| 140 *d++ = *s++; |
| 141 } |
| 142 if (n & 1) { |
| 143 *d = *s; |
| 144 } |
| 145 return dest; |
| 120 #endif | 146 #endif |
| 121 | 147 |
| 122 » for (; n; n--) *d++ = *s++; | 148 for (; n; n--) |
| 123 » return dest; | 149 *d++ = *s++; |
| 150 return dest; |
| 124 } | 151 } |
| OLD | NEW |