OLD | NEW |
1 #include <string.h> | 1 #include <string.h> |
2 #include <stdint.h> | 2 #include <stdint.h> |
3 #include <endian.h> | 3 #include <endian.h> |
4 | 4 |
5 void *memcpy(void *restrict dest, const void *restrict src, size_t n) | 5 void* memcpy(void* restrict dest, const void* restrict src, size_t n) { |
6 { | 6 unsigned char* d = dest; |
7 » unsigned char *d = dest; | 7 const unsigned char* s = src; |
8 » const unsigned char *s = src; | |
9 | 8 |
10 #ifdef __GNUC__ | 9 #ifdef __GNUC__ |
11 | 10 |
12 #if __BYTE_ORDER == __LITTLE_ENDIAN | 11 #if __BYTE_ORDER == __LITTLE_ENDIAN |
13 #define LS >> | 12 #define LS >> |
14 #define RS << | 13 #define RS << |
15 #else | 14 #else |
16 #define LS << | 15 #define LS << |
17 #define RS >> | 16 #define RS >> |
18 #endif | 17 #endif |
19 | 18 |
20 » typedef uint32_t __attribute__((__may_alias__)) u32; | 19 typedef uint32_t __attribute__((__may_alias__)) u32; |
21 » uint32_t w, x; | 20 uint32_t w, x; |
22 | 21 |
23 » for (; (uintptr_t)s % 4 && n; n--) *d++ = *s++; | 22 for (; (uintptr_t)s % 4 && n; n--) |
| 23 *d++ = *s++; |
24 | 24 |
25 » if ((uintptr_t)d % 4 == 0) { | 25 if ((uintptr_t)d % 4 == 0) { |
26 » » for (; n>=16; s+=16, d+=16, n-=16) { | 26 for (; n >= 16; s += 16, d += 16, n -= 16) { |
27 » » » *(u32 *)(d+0) = *(u32 *)(s+0); | 27 *(u32*)(d + 0) = *(u32*)(s + 0); |
28 » » » *(u32 *)(d+4) = *(u32 *)(s+4); | 28 *(u32*)(d + 4) = *(u32*)(s + 4); |
29 » » » *(u32 *)(d+8) = *(u32 *)(s+8); | 29 *(u32*)(d + 8) = *(u32*)(s + 8); |
30 » » » *(u32 *)(d+12) = *(u32 *)(s+12); | 30 *(u32*)(d + 12) = *(u32*)(s + 12); |
31 » » } | 31 } |
32 » » if (n&8) { | 32 if (n & 8) { |
33 » » » *(u32 *)(d+0) = *(u32 *)(s+0); | 33 *(u32*)(d + 0) = *(u32*)(s + 0); |
34 » » » *(u32 *)(d+4) = *(u32 *)(s+4); | 34 *(u32*)(d + 4) = *(u32*)(s + 4); |
35 » » » d += 8; s += 8; | 35 d += 8; |
36 » » } | 36 s += 8; |
37 » » if (n&4) { | 37 } |
38 » » » *(u32 *)(d+0) = *(u32 *)(s+0); | 38 if (n & 4) { |
39 » » » d += 4; s += 4; | 39 *(u32*)(d + 0) = *(u32*)(s + 0); |
40 » » } | 40 d += 4; |
41 » » if (n&2) { | 41 s += 4; |
42 » » » *d++ = *s++; *d++ = *s++; | 42 } |
43 » » } | 43 if (n & 2) { |
44 » » if (n&1) { | 44 *d++ = *s++; |
45 » » » *d = *s; | 45 *d++ = *s++; |
46 » » } | 46 } |
47 » » return dest; | 47 if (n & 1) { |
48 » } | 48 *d = *s; |
| 49 } |
| 50 return dest; |
| 51 } |
49 | 52 |
50 » if (n >= 32) switch ((uintptr_t)d % 4) { | 53 if (n >= 32) |
51 » case 1: | 54 switch ((uintptr_t)d % 4) { |
52 » » w = *(u32 *)s; | 55 case 1: |
53 » » *d++ = *s++; | 56 w = *(u32*)s; |
54 » » *d++ = *s++; | 57 *d++ = *s++; |
55 » » *d++ = *s++; | 58 *d++ = *s++; |
56 » » n -= 3; | 59 *d++ = *s++; |
57 » » for (; n>=17; s+=16, d+=16, n-=16) { | 60 n -= 3; |
58 » » » x = *(u32 *)(s+1); | 61 for (; n >= 17; s += 16, d += 16, n -= 16) { |
59 » » » *(u32 *)(d+0) = (w LS 24) | (x RS 8); | 62 x = *(u32*)(s + 1); |
60 » » » w = *(u32 *)(s+5); | 63 *(u32*)(d + 0) = (w LS 24) | (x RS 8); |
61 » » » *(u32 *)(d+4) = (x LS 24) | (w RS 8); | 64 w = *(u32*)(s + 5); |
62 » » » x = *(u32 *)(s+9); | 65 *(u32*)(d + 4) = (x LS 24) | (w RS 8); |
63 » » » *(u32 *)(d+8) = (w LS 24) | (x RS 8); | 66 x = *(u32*)(s + 9); |
64 » » » w = *(u32 *)(s+13); | 67 *(u32*)(d + 8) = (w LS 24) | (x RS 8); |
65 » » » *(u32 *)(d+12) = (x LS 24) | (w RS 8); | 68 w = *(u32*)(s + 13); |
66 » » } | 69 *(u32*)(d + 12) = (x LS 24) | (w RS 8); |
67 » » break; | 70 } |
68 » case 2: | 71 break; |
69 » » w = *(u32 *)s; | 72 case 2: |
70 » » *d++ = *s++; | 73 w = *(u32*)s; |
71 » » *d++ = *s++; | 74 *d++ = *s++; |
72 » » n -= 2; | 75 *d++ = *s++; |
73 » » for (; n>=18; s+=16, d+=16, n-=16) { | 76 n -= 2; |
74 » » » x = *(u32 *)(s+2); | 77 for (; n >= 18; s += 16, d += 16, n -= 16) { |
75 » » » *(u32 *)(d+0) = (w LS 16) | (x RS 16); | 78 x = *(u32*)(s + 2); |
76 » » » w = *(u32 *)(s+6); | 79 *(u32*)(d + 0) = (w LS 16) | (x RS 16); |
77 » » » *(u32 *)(d+4) = (x LS 16) | (w RS 16); | 80 w = *(u32*)(s + 6); |
78 » » » x = *(u32 *)(s+10); | 81 *(u32*)(d + 4) = (x LS 16) | (w RS 16); |
79 » » » *(u32 *)(d+8) = (w LS 16) | (x RS 16); | 82 x = *(u32*)(s + 10); |
80 » » » w = *(u32 *)(s+14); | 83 *(u32*)(d + 8) = (w LS 16) | (x RS 16); |
81 » » » *(u32 *)(d+12) = (x LS 16) | (w RS 16); | 84 w = *(u32*)(s + 14); |
82 » » } | 85 *(u32*)(d + 12) = (x LS 16) | (w RS 16); |
83 » » break; | 86 } |
84 » case 3: | 87 break; |
85 » » w = *(u32 *)s; | 88 case 3: |
86 » » *d++ = *s++; | 89 w = *(u32*)s; |
87 » » n -= 1; | 90 *d++ = *s++; |
88 » » for (; n>=19; s+=16, d+=16, n-=16) { | 91 n -= 1; |
89 » » » x = *(u32 *)(s+3); | 92 for (; n >= 19; s += 16, d += 16, n -= 16) { |
90 » » » *(u32 *)(d+0) = (w LS 8) | (x RS 24); | 93 x = *(u32*)(s + 3); |
91 » » » w = *(u32 *)(s+7); | 94 *(u32*)(d + 0) = (w LS 8) | (x RS 24); |
92 » » » *(u32 *)(d+4) = (x LS 8) | (w RS 24); | 95 w = *(u32*)(s + 7); |
93 » » » x = *(u32 *)(s+11); | 96 *(u32*)(d + 4) = (x LS 8) | (w RS 24); |
94 » » » *(u32 *)(d+8) = (w LS 8) | (x RS 24); | 97 x = *(u32*)(s + 11); |
95 » » » w = *(u32 *)(s+15); | 98 *(u32*)(d + 8) = (w LS 8) | (x RS 24); |
96 » » » *(u32 *)(d+12) = (x LS 8) | (w RS 24); | 99 w = *(u32*)(s + 15); |
97 » » } | 100 *(u32*)(d + 12) = (x LS 8) | (w RS 24); |
98 » » break; | 101 } |
99 » } | 102 break; |
100 » if (n&16) { | 103 } |
101 » » *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | 104 if (n & 16) { |
102 » » *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | 105 *d++ = *s++; |
103 » » *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | 106 *d++ = *s++; |
104 » » *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | 107 *d++ = *s++; |
105 » } | 108 *d++ = *s++; |
106 » if (n&8) { | 109 *d++ = *s++; |
107 » » *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | 110 *d++ = *s++; |
108 » » *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | 111 *d++ = *s++; |
109 » } | 112 *d++ = *s++; |
110 » if (n&4) { | 113 *d++ = *s++; |
111 » » *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | 114 *d++ = *s++; |
112 » } | 115 *d++ = *s++; |
113 » if (n&2) { | 116 *d++ = *s++; |
114 » » *d++ = *s++; *d++ = *s++; | 117 *d++ = *s++; |
115 » } | 118 *d++ = *s++; |
116 » if (n&1) { | 119 *d++ = *s++; |
117 » » *d = *s; | 120 *d++ = *s++; |
118 » } | 121 } |
119 » return dest; | 122 if (n & 8) { |
| 123 *d++ = *s++; |
| 124 *d++ = *s++; |
| 125 *d++ = *s++; |
| 126 *d++ = *s++; |
| 127 *d++ = *s++; |
| 128 *d++ = *s++; |
| 129 *d++ = *s++; |
| 130 *d++ = *s++; |
| 131 } |
| 132 if (n & 4) { |
| 133 *d++ = *s++; |
| 134 *d++ = *s++; |
| 135 *d++ = *s++; |
| 136 *d++ = *s++; |
| 137 } |
| 138 if (n & 2) { |
| 139 *d++ = *s++; |
| 140 *d++ = *s++; |
| 141 } |
| 142 if (n & 1) { |
| 143 *d = *s; |
| 144 } |
| 145 return dest; |
120 #endif | 146 #endif |
121 | 147 |
122 » for (; n; n--) *d++ = *s++; | 148 for (; n; n--) |
123 » return dest; | 149 *d++ = *s++; |
| 150 return dest; |
124 } | 151 } |
OLD | NEW |