OLD | NEW |
| (Empty) |
1 // Copyright 2003, 2004 Colin Percival | |
2 // All rights reserved | |
3 // | |
4 // Redistribution and use in source and binary forms, with or without | |
5 // modification, are permitted providing that the following conditions | |
6 // are met: | |
7 // 1. Redistributions of source code must retain the above copyright | |
8 // notice, this list of conditions and the following disclaimer. | |
9 // 2. Redistributions in binary form must reproduce the above copyright | |
10 // notice, this list of conditions and the following disclaimer in the | |
11 // documentation and/or other materials provided with the distribution. | |
12 // | |
13 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | |
14 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
15 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
16 // ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | |
17 // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
18 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
19 // OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
20 // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
21 // STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING | |
22 // IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
23 // POSSIBILITY OF SUCH DAMAGE. | |
24 // | |
25 // For the terms under which this work may be distributed, please see | |
26 // the adjoining file "LICENSE". | |
27 // | |
28 // ChangeLog: | |
29 // 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit | |
30 // values throughout. | |
31 // --Benjamin Smedberg <benjamin@smedbergs.us> | |
32 // 2010-05-26 - Use a paged array for V and I. The address space may be too | |
33 // fragmented for these big arrays to be contiguous. | |
34 // --Stephen Adams <sra@chromium.org> | |
35 // 2015-08-03 - Extract QSufSort to a separate file as template. | |
36 // --Samuel Huang <huangs@chromium.org> | |
37 // 2015-08-19 - Optimize split(), add comments. | |
38 // --Samuel Huang <huangs@chromium.org> | |
39 // 2016-04-27 - Change split() to use Bentley & McIlroy's pivot selection | |
40 // algorithm, which QSufSort originally used. Reference: | |
41 // http://www.larsson.dogma.net/qsufsort.c | |
42 // --Samuel Huang <huangs@chromium.org> | |
43 | |
44 // Copyright 2016 The Chromium Authors. All rights reserved. | |
45 // Use of this source code is governed by a BSD-style license that can be | |
46 // found in the LICENSE file. | |
47 | |
48 #ifndef COURGETTE_THIRD_PARTY_BSDIFF_QSUFSORT_H_ | |
49 #define COURGETTE_THIRD_PARTY_BSDIFF_QSUFSORT_H_ | |
50 | |
51 namespace qsuf { | |
52 | |
53 // ------------------------------------------------------------------------ | |
54 // | |
55 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the | |
56 // code formatting and variable names. The changes from the original are: | |
57 // (1) replacing tabs with spaces, | |
58 // (2) indentation and spacing, | |
59 // (3) using 'const', | |
60 // (4) changing the V and I parameters from int* to template <typename T>. | |
61 // (5) optimizing split(); fix styles. | |
62 // (6) moving matchlen() and search() to a separate file. | |
63 // | |
64 // The code appears to be a rewritten version of the suffix array algorithm | |
65 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko | |
66 // Sadakane, special cased for bytes. | |
67 | |
68 namespace { | |
69 | |
70 template <typename T> | |
71 T median3(const T& a, const T& b, const T& c) { | |
72 if (a < b) | |
73 return b < c ? b : (a < c ? c : a); | |
74 return b > c ? b : (a > c ? c : a); | |
75 } | |
76 | |
77 } // namespace | |
78 | |
79 template <typename T> | |
80 void split(T I, T V, int start, int end, int h) { | |
81 // For small interval, apply selection sort. | |
82 if (end - start < 16) { | |
83 for (int i = start; i < end;) { | |
84 int skip = 1; | |
85 int best = V[I[i] + h]; | |
86 for (int j = i + 1; j < end; j++) { | |
87 int cur = V[I[j] + h]; | |
88 if (best > cur) { | |
89 best = cur; | |
90 int tmp = I[i]; | |
91 I[i] = I[j]; | |
92 I[j] = tmp; | |
93 skip = 1; | |
94 } else if (best == cur) { | |
95 int tmp = I[i + skip]; | |
96 I[i + skip] = I[j]; | |
97 I[j] = tmp; | |
98 ++skip; | |
99 } | |
100 } | |
101 if (skip == 1) { | |
102 V[I[i]] = i; | |
103 I[i] = -1; | |
104 } else { | |
105 for (int j = i, jend = i + skip; j < jend; j++) | |
106 V[I[j]] = jend - 1; | |
107 } | |
108 i += skip; | |
109 } | |
110 return; | |
111 } | |
112 | |
113 // Select pivot, algorithm by Bentley & McIlroy. | |
114 int n = end - start; | |
115 int mid = start + (n >> 1); | |
116 int pivot = V[I[mid] + h]; | |
117 int p1 = V[I[start] + h]; | |
118 int p2 = V[I[end - 1] + h]; | |
119 if (n > 40) { // Big array: Pseudomedian of 9. | |
120 int s = n >> 3; | |
121 pivot = median3(pivot, V[I[mid - s] + h], V[I[mid + s] + h]); | |
122 p1 = median3(p1, V[I[start + s] + h], V[I[start + s + s] + h]); | |
123 p2 = median3(p2, V[I[end - 1 - s] + h], V[I[end - 1 - s - s] + h]); | |
124 } // Else medium array: Pseudomedian of 3. | |
125 pivot = median3(pivot, p1, p2); | |
126 | |
127 // Split [start, end) into 3 intervals: | |
128 // [start, j) with secondary keys < pivot, | |
129 // [j, k) with secondary keys == pivot, | |
130 // [k, end) with secondary keys > pivot. | |
131 int j = start; | |
132 int k = end; | |
133 for (int i = start; i < k;) { | |
134 int cur = V[I[i] + h]; | |
135 if (cur < pivot) { | |
136 if (i != j) { | |
137 int tmp = I[i]; | |
138 I[i] = I[j]; | |
139 I[j] = tmp; | |
140 } | |
141 ++i; | |
142 ++j; | |
143 } else if (cur > pivot) { | |
144 --k; | |
145 int tmp = I[i]; | |
146 I[i] = I[k]; | |
147 I[k] = tmp; | |
148 } else { | |
149 ++i; | |
150 } | |
151 } | |
152 | |
153 // Recurse on the "< pivot" piece. | |
154 if (start < j) | |
155 split<T>(I, V, start, j, h); | |
156 | |
157 // Update the "== pivot" piece. | |
158 if (j == k - 1) { | |
159 V[I[j]] = j; | |
160 I[j] = -1; | |
161 } else { | |
162 for (int i = j; i < k; ++i) | |
163 V[I[i]] = k - 1; | |
164 } | |
165 | |
166 // Recurse on the "> pivot" piece. | |
167 if (k < end) | |
168 split<T>(I, V, k, end, h); | |
169 } | |
170 | |
171 template <class T> | |
172 static void qsufsort(T I, T V, const unsigned char* old, int oldsize) { | |
173 int buckets[256]; | |
174 int i, h, len; | |
175 | |
176 for (i = 0; i < 256; i++) | |
177 buckets[i] = 0; | |
178 for (i = 0; i < oldsize; i++) | |
179 buckets[old[i]]++; | |
180 for (i = 1; i < 256; i++) | |
181 buckets[i] += buckets[i - 1]; | |
182 for (i = 255; i > 0; i--) | |
183 buckets[i] = buckets[i - 1]; | |
184 buckets[0] = 0; | |
185 | |
186 for (i = 0; i < oldsize; i++) | |
187 I[++buckets[old[i]]] = i; | |
188 I[0] = oldsize; | |
189 for (i = 0; i < oldsize; i++) | |
190 V[i] = buckets[old[i]]; | |
191 V[oldsize] = 0; | |
192 for (i = 1; i < 256; i++) | |
193 if (buckets[i] == buckets[i - 1] + 1) | |
194 I[buckets[i]] = -1; | |
195 I[0] = -1; | |
196 | |
197 for (h = 1; I[0] != -(oldsize + 1); h += h) { | |
198 len = 0; | |
199 for (i = 0; i < oldsize + 1;) { | |
200 if (I[i] < 0) { | |
201 len -= I[i]; | |
202 i -= I[i]; | |
203 } else { | |
204 if (len) | |
205 I[i - len] = -len; | |
206 len = V[I[i]] + 1 - i; | |
207 split<T>(I, V, i, i + len, h); | |
208 i += len; | |
209 len = 0; | |
210 }; | |
211 }; | |
212 if (len) | |
213 I[i - len] = -len; | |
214 }; | |
215 | |
216 for (i = 0; i < oldsize + 1; i++) | |
217 I[V[i]] = i; | |
218 } | |
219 | |
220 // End of 'verbatim' code. | |
221 // ------------------------------------------------------------------------ | |
222 | |
223 } // namespace qsuf | |
224 | |
225 #endif // COURGETTE_THIRD_PARTY_BSDIFF_QSUFSORT_H_ | |
OLD | NEW |