| OLD | NEW |
| 1 /* | 1 /* |
| 2 qsufsort.h -- Suffix array generation. | 2 qsufsort.h -- Suffix array generation. |
| 3 | 3 |
| 4 Copyright 2003 Colin Percival | 4 Copyright 2003 Colin Percival |
| 5 | 5 |
| 6 For the terms under which this work may be distributed, please see | 6 For the terms under which this work may be distributed, please see |
| 7 the adjoining file "LICENSE". | 7 the adjoining file "LICENSE". |
| 8 | 8 |
| 9 ChangeLog: | 9 ChangeLog: |
| 10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit | 10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit |
| 11 values throughout. | 11 values throughout. |
| 12 --Benjamin Smedberg <benjamin@smedbergs.us> | 12 --Benjamin Smedberg <benjamin@smedbergs.us> |
| 13 2010-05-26 - Use a paged array for V and I. The address space may be too | 13 2010-05-26 - Use a paged array for V and I. The address space may be too |
| 14 fragmented for these big arrays to be contiguous. | 14 fragmented for these big arrays to be contiguous. |
| 15 --Stephen Adams <sra@chromium.org> | 15 --Stephen Adams <sra@chromium.org> |
| 16 2015-08-03 - Extract QSufSort to a separate file as template. | 16 2015-08-03 - Extract QSufSort to a separate file as template. |
| 17 --Samuel Huang <huangs@chromium.org> | 17 --Samuel Huang <huangs@chromium.org> |
| 18 2015-08-19 - Optimize split() and search(), add comments. | 18 2015-08-19 - Optimize split(), add comments. |
| 19 --Samuel Huang <huangs@chromium.org> | 19 --Samuel Huang <huangs@chromium.org> |
| 20 2016-04-27 - Change split() to use Bentley & McIlroy's pivot selection | 20 2016-04-27 - Change split() to use Bentley & McIlroy's pivot selection |
| 21 algorithm, which QSufSort originally used. Reference: | 21 algorithm, which QSufSort originally used. Reference: |
| 22 http://www.larsson.dogma.net/qsufsort.c | 22 http://www.larsson.dogma.net/qsufsort.c |
| 23 --Samuel Huang <huangs@chromium.org> | 23 --Samuel Huang <huangs@chromium.org> |
| 24 */ | 24 */ |
| 25 | 25 |
| 26 #ifndef COURGETTE_THIRD_PARTY_BSDIFF_QSUFSORT_H_ | 26 #ifndef COURGETTE_THIRD_PARTY_BSDIFF_QSUFSORT_H_ |
| 27 #define COURGETTE_THIRD_PARTY_BSDIFF_QSUFSORT_H_ | 27 #define COURGETTE_THIRD_PARTY_BSDIFF_QSUFSORT_H_ |
| 28 | 28 |
| 29 #include <algorithm> | |
| 30 #include <cstring> | |
| 31 | |
| 32 namespace courgette { | |
| 33 namespace qsuf { | 29 namespace qsuf { |
| 34 | 30 |
| 35 // ------------------------------------------------------------------------ | 31 // ------------------------------------------------------------------------ |
| 36 // | 32 // |
| 37 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the | 33 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the |
| 38 // code formatting and variable names. The changes from the original are: | 34 // code formatting and variable names. The changes from the original are: |
| 39 // (1) replacing tabs with spaces, | 35 // (1) replacing tabs with spaces, |
| 40 // (2) indentation and spacing, | 36 // (2) indentation and spacing, |
| 41 // (3) using 'const', | 37 // (3) using 'const', |
| 42 // (4) changing the V and I parameters from int* to template <typename T>. | 38 // (4) changing the V and I parameters from int* to template <typename T>. |
| 43 // (5) optimizing split() and search(); fix styles. | 39 // (5) optimizing split(); fix styles. |
| 40 // (6) moving matchlen() and search() to a separate file. |
| 44 // | 41 // |
| 45 // The code appears to be a rewritten version of the suffix array algorithm | 42 // The code appears to be a rewritten version of the suffix array algorithm |
| 46 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko | 43 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko |
| 47 // Sadakane, special cased for bytes. | 44 // Sadakane, special cased for bytes. |
| 48 | 45 |
| 49 namespace { | 46 namespace { |
| 50 | 47 |
| 51 template <typename T> | 48 template <typename T> |
| 52 T median3(const T& a, const T& b, const T& c) { | 49 T median3(const T& a, const T& b, const T& c) { |
| 53 if (a < b) | 50 if (a < b) |
| (...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 191 }; | 188 }; |
| 192 }; | 189 }; |
| 193 if (len) | 190 if (len) |
| 194 I[i - len] = -len; | 191 I[i - len] = -len; |
| 195 }; | 192 }; |
| 196 | 193 |
| 197 for (i = 0; i < oldsize + 1; i++) | 194 for (i = 0; i < oldsize + 1; i++) |
| 198 I[V[i]] = i; | 195 I[V[i]] = i; |
| 199 } | 196 } |
| 200 | 197 |
| 201 static int matchlen(const unsigned char* old, | |
| 202 int oldsize, | |
| 203 const unsigned char* newbuf, | |
| 204 int newsize) { | |
| 205 int i; | |
| 206 | |
| 207 for (i = 0; (i < oldsize) && (i < newsize); i++) | |
| 208 if (old[i] != newbuf[i]) | |
| 209 break; | |
| 210 | |
| 211 return i; | |
| 212 } | |
| 213 | |
| 214 template <class T> | |
| 215 static int search(T I, | |
| 216 const unsigned char* old, | |
| 217 int oldsize, | |
| 218 const unsigned char* newbuf, | |
| 219 int newsize, | |
| 220 int* pos) { | |
| 221 int lo = 0; | |
| 222 int hi = oldsize; | |
| 223 while (hi - lo >= 2) { | |
| 224 int mid = (lo + hi) / 2; | |
| 225 if (memcmp(old + I[mid], newbuf, std::min(oldsize - I[mid], newsize)) < 0) { | |
| 226 lo = mid; | |
| 227 } else { | |
| 228 hi = mid; | |
| 229 } | |
| 230 } | |
| 231 | |
| 232 int x = matchlen(old + I[lo], oldsize - I[lo], newbuf, newsize); | |
| 233 int y = matchlen(old + I[hi], oldsize - I[hi], newbuf, newsize); | |
| 234 if (x > y) { | |
| 235 *pos = I[lo]; | |
| 236 return x; | |
| 237 } | |
| 238 *pos = I[hi]; | |
| 239 return y; | |
| 240 } | |
| 241 | |
| 242 // End of 'verbatim' code. | 198 // End of 'verbatim' code. |
| 243 // ------------------------------------------------------------------------ | 199 // ------------------------------------------------------------------------ |
| 244 | 200 |
| 245 } // namespace qsuf | 201 } // namespace qsuf |
| 246 } // namespace courgette | 202 |
| 247 #endif // COURGETTE_THIRD_PARTY_BSDIFF_QSUFSORT_H_ | 203 #endif // COURGETTE_THIRD_PARTY_BSDIFF_QSUFSORT_H_ |
| OLD | NEW |