OLD | NEW |
1 #include <string.h> | 1 #include <string.h> |
2 #include <stdint.h> | 2 #include <stdint.h> |
3 | 3 |
4 static char *twobyte_strstr(const unsigned char *h, const unsigned char *n) | 4 static char* twobyte_strstr(const unsigned char* h, const unsigned char* n) { |
5 { | 5 uint16_t nw = n[0] << 8 | n[1], hw = h[0] << 8 | h[1]; |
6 » uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1]; | 6 for (h++; *h && hw != nw; hw = hw << 8 | *++h) |
7 » for (h++; *h && hw != nw; hw = hw<<8 | *++h); | 7 ; |
8 » return *h ? (char *)h-1 : 0; | 8 return *h ? (char*)h - 1 : 0; |
9 } | 9 } |
10 | 10 |
11 static char *threebyte_strstr(const unsigned char *h, const unsigned char *n) | 11 static char* threebyte_strstr(const unsigned char* h, const unsigned char* n) { |
12 { | 12 uint32_t nw = n[0] << 24 | n[1] << 16 | n[2] << 8; |
13 » uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8; | 13 uint32_t hw = h[0] << 24 | h[1] << 16 | h[2] << 8; |
14 » uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8; | 14 for (h += 2; *h && hw != nw; hw = (hw | *++h) << 8) |
15 » for (h+=2; *h && hw != nw; hw = (hw|*++h)<<8); | 15 ; |
16 » return *h ? (char *)h-2 : 0; | 16 return *h ? (char*)h - 2 : 0; |
17 } | 17 } |
18 | 18 |
19 static char *fourbyte_strstr(const unsigned char *h, const unsigned char *n) | 19 static char* fourbyte_strstr(const unsigned char* h, const unsigned char* n) { |
20 { | 20 uint32_t nw = n[0] << 24 | n[1] << 16 | n[2] << 8 | n[3]; |
21 » uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3]; | 21 uint32_t hw = h[0] << 24 | h[1] << 16 | h[2] << 8 | h[3]; |
22 » uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3]; | 22 for (h += 3; *h && hw != nw; hw = hw << 8 | *++h) |
23 » for (h+=3; *h && hw != nw; hw = hw<<8 | *++h); | 23 ; |
24 » return *h ? (char *)h-3 : 0; | 24 return *h ? (char*)h - 3 : 0; |
25 } | 25 } |
26 | 26 |
27 #define MAX(a,b) ((a)>(b)?(a):(b)) | 27 #define MAX(a, b) ((a) > (b) ? (a) : (b)) |
28 #define MIN(a,b) ((a)<(b)?(a):(b)) | 28 #define MIN(a, b) ((a) < (b) ? (a) : (b)) |
29 | 29 |
30 #define BITOP(a,b,op) \ | 30 #define BITOP(a, b, op) \ |
31 ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a)))) | 31 ((a)[(size_t)(b) / (8 * sizeof *(a))] op(size_t) 1 \ |
| 32 << ((size_t)(b) % (8 * sizeof *(a)))) |
32 | 33 |
33 static char *twoway_strstr(const unsigned char *h, const unsigned char *n) | 34 static char* twoway_strstr(const unsigned char* h, const unsigned char* n) { |
34 { | 35 const unsigned char* z; |
35 » const unsigned char *z; | 36 size_t l, ip, jp, k, p, ms, p0, mem, mem0; |
36 » size_t l, ip, jp, k, p, ms, p0, mem, mem0; | 37 size_t byteset[32 / sizeof(size_t)] = {0}; |
37 » size_t byteset[32 / sizeof(size_t)] = { 0 }; | 38 size_t shift[256]; |
38 » size_t shift[256]; | |
39 | 39 |
40 » /* Computing length of needle and fill shift table */ | 40 /* Computing length of needle and fill shift table */ |
41 » for (l=0; n[l] && h[l]; l++) | 41 for (l = 0; n[l] && h[l]; l++) |
42 » » BITOP(byteset, n[l], |=), shift[n[l]] = l+1; | 42 BITOP(byteset, n[l], |=), shift[n[l]] = l + 1; |
43 » if (n[l]) return 0; /* hit the end of h */ | 43 if (n[l]) |
| 44 return 0; /* hit the end of h */ |
44 | 45 |
45 » /* Compute maximal suffix */ | 46 /* Compute maximal suffix */ |
46 » ip = -1; jp = 0; k = p = 1; | 47 ip = -1; |
47 » while (jp+k<l) { | 48 jp = 0; |
48 » » if (n[ip+k] == n[jp+k]) { | 49 k = p = 1; |
49 » » » if (k == p) { | 50 while (jp + k < l) { |
50 » » » » jp += p; | 51 if (n[ip + k] == n[jp + k]) { |
51 » » » » k = 1; | 52 if (k == p) { |
52 » » » } else k++; | 53 jp += p; |
53 » » } else if (n[ip+k] > n[jp+k]) { | 54 k = 1; |
54 » » » jp += k; | 55 } else |
55 » » » k = 1; | 56 k++; |
56 » » » p = jp - ip; | 57 } else if (n[ip + k] > n[jp + k]) { |
57 » » } else { | 58 jp += k; |
58 » » » ip = jp++; | 59 k = 1; |
59 » » » k = p = 1; | 60 p = jp - ip; |
60 » » } | 61 } else { |
61 » } | 62 ip = jp++; |
62 » ms = ip; | 63 k = p = 1; |
63 » p0 = p; | 64 } |
| 65 } |
| 66 ms = ip; |
| 67 p0 = p; |
64 | 68 |
65 » /* And with the opposite comparison */ | 69 /* And with the opposite comparison */ |
66 » ip = -1; jp = 0; k = p = 1; | 70 ip = -1; |
67 » while (jp+k<l) { | 71 jp = 0; |
68 » » if (n[ip+k] == n[jp+k]) { | 72 k = p = 1; |
69 » » » if (k == p) { | 73 while (jp + k < l) { |
70 » » » » jp += p; | 74 if (n[ip + k] == n[jp + k]) { |
71 » » » » k = 1; | 75 if (k == p) { |
72 » » » } else k++; | 76 jp += p; |
73 » » } else if (n[ip+k] < n[jp+k]) { | 77 k = 1; |
74 » » » jp += k; | 78 } else |
75 » » » k = 1; | 79 k++; |
76 » » » p = jp - ip; | 80 } else if (n[ip + k] < n[jp + k]) { |
77 » » } else { | 81 jp += k; |
78 » » » ip = jp++; | 82 k = 1; |
79 » » » k = p = 1; | 83 p = jp - ip; |
80 » » } | 84 } else { |
81 » } | 85 ip = jp++; |
82 » if (ip+1 > ms+1) ms = ip; | 86 k = p = 1; |
83 » else p = p0; | 87 } |
| 88 } |
| 89 if (ip + 1 > ms + 1) |
| 90 ms = ip; |
| 91 else |
| 92 p = p0; |
84 | 93 |
85 » /* Periodic needle? */ | 94 /* Periodic needle? */ |
86 » if (memcmp(n, n+p, ms+1)) { | 95 if (memcmp(n, n + p, ms + 1)) { |
87 » » mem0 = 0; | 96 mem0 = 0; |
88 » » p = MAX(ms, l-ms-1) + 1; | 97 p = MAX(ms, l - ms - 1) + 1; |
89 » } else mem0 = l-p; | 98 } else |
90 » mem = 0; | 99 mem0 = l - p; |
| 100 mem = 0; |
91 | 101 |
92 » /* Initialize incremental end-of-haystack pointer */ | 102 /* Initialize incremental end-of-haystack pointer */ |
93 » z = h; | 103 z = h; |
94 | 104 |
95 » /* Search loop */ | 105 /* Search loop */ |
96 » for (;;) { | 106 for (;;) { |
97 » » /* Update incremental end-of-haystack pointer */ | 107 /* Update incremental end-of-haystack pointer */ |
98 » » if (z-h < l) { | 108 if (z - h < l) { |
99 » » » /* Fast estimate for MIN(l,63) */ | 109 /* Fast estimate for MIN(l,63) */ |
100 » » » size_t grow = l | 63; | 110 size_t grow = l | 63; |
101 » » » const unsigned char *z2 = memchr(z, 0, grow); | 111 const unsigned char* z2 = memchr(z, 0, grow); |
102 » » » if (z2) { | 112 if (z2) { |
103 » » » » z = z2; | 113 z = z2; |
104 » » » » if (z-h < l) return 0; | 114 if (z - h < l) |
105 » » » } else z += grow; | 115 return 0; |
106 » » } | 116 } else |
| 117 z += grow; |
| 118 } |
107 | 119 |
108 » » /* Check last byte first; advance by shift on mismatch */ | 120 /* Check last byte first; advance by shift on mismatch */ |
109 » » if (BITOP(byteset, h[l-1], &)) { | 121 if (BITOP(byteset, h[l - 1], &)) { |
110 » » » k = l-shift[h[l-1]]; | 122 k = l - shift[h[l - 1]]; |
111 » » » //printf("adv by %zu (on %c) at [%s] (%zu;l=%zu)\n", k,
h[l-1], h, shift[h[l-1]], l); | 123 // printf("adv by %zu (on %c) at [%s] (%zu;l=%zu)\n", k, h[l-1], h, |
112 » » » if (k) { | 124 // shift[h[l-1]], l); |
113 » » » » if (mem0 && mem && k < p) k = l-p; | 125 if (k) { |
114 » » » » h += k; | 126 if (mem0 && mem && k < p) |
115 » » » » mem = 0; | 127 k = l - p; |
116 » » » » continue; | 128 h += k; |
117 » » » } | 129 mem = 0; |
118 » » } else { | 130 continue; |
119 » » » h += l; | 131 } |
120 » » » mem = 0; | 132 } else { |
121 » » » continue; | 133 h += l; |
122 » » } | 134 mem = 0; |
| 135 continue; |
| 136 } |
123 | 137 |
124 » » /* Compare right half */ | 138 /* Compare right half */ |
125 » » for (k=MAX(ms+1,mem); n[k] && n[k] == h[k]; k++); | 139 for (k = MAX(ms + 1, mem); n[k] && n[k] == h[k]; k++) |
126 » » if (n[k]) { | 140 ; |
127 » » » h += k-ms; | 141 if (n[k]) { |
128 » » » mem = 0; | 142 h += k - ms; |
129 » » » continue; | 143 mem = 0; |
130 » » } | 144 continue; |
131 » » /* Compare left half */ | 145 } |
132 » » for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--); | 146 /* Compare left half */ |
133 » » if (k <= mem) return (char *)h; | 147 for (k = ms + 1; k > mem && n[k - 1] == h[k - 1]; k--) |
134 » » h += p; | 148 ; |
135 » » mem = mem0; | 149 if (k <= mem) |
136 » } | 150 return (char*)h; |
| 151 h += p; |
| 152 mem = mem0; |
| 153 } |
137 } | 154 } |
138 | 155 |
139 char *strstr(const char *h, const char *n) | 156 char* strstr(const char* h, const char* n) { |
140 { | 157 /* Return immediately on empty needle */ |
141 » /* Return immediately on empty needle */ | 158 if (!n[0]) |
142 » if (!n[0]) return (char *)h; | 159 return (char*)h; |
143 | 160 |
144 » /* Use faster algorithms for short needles */ | 161 /* Use faster algorithms for short needles */ |
145 » h = strchr(h, *n); | 162 h = strchr(h, *n); |
146 » if (!h || !n[1]) return (char *)h; | 163 if (!h || !n[1]) |
147 » if (!h[1]) return 0; | 164 return (char*)h; |
148 » if (!n[2]) return twobyte_strstr((void *)h, (void *)n); | 165 if (!h[1]) |
149 » if (!h[2]) return 0; | 166 return 0; |
150 » if (!n[3]) return threebyte_strstr((void *)h, (void *)n); | 167 if (!n[2]) |
151 » if (!h[3]) return 0; | 168 return twobyte_strstr((void*)h, (void*)n); |
152 » if (!n[4]) return fourbyte_strstr((void *)h, (void *)n); | 169 if (!h[2]) |
| 170 return 0; |
| 171 if (!n[3]) |
| 172 return threebyte_strstr((void*)h, (void*)n); |
| 173 if (!h[3]) |
| 174 return 0; |
| 175 if (!n[4]) |
| 176 return fourbyte_strstr((void*)h, (void*)n); |
153 | 177 |
154 » return twoway_strstr((void *)h, (void *)n); | 178 return twoway_strstr((void*)h, (void*)n); |
155 } | 179 } |
OLD | NEW |