| OLD | NEW |
| (Empty) |
| 1 // Copyright 2003, 2004 Colin Percival | |
| 2 // All rights reserved | |
| 3 // | |
| 4 // Redistribution and use in source and binary forms, with or without | |
| 5 // modification, are permitted providing that the following conditions | |
| 6 // are met: | |
| 7 // 1. Redistributions of source code must retain the above copyright | |
| 8 // notice, this list of conditions and the following disclaimer. | |
| 9 // 2. Redistributions in binary form must reproduce the above copyright | |
| 10 // notice, this list of conditions and the following disclaimer in the | |
| 11 // documentation and/or other materials provided with the distribution. | |
| 12 // | |
| 13 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | |
| 14 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 15 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 16 // ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | |
| 17 // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 18 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 19 // OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 20 // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| 21 // STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING | |
| 22 // IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
| 23 // POSSIBILITY OF SUCH DAMAGE. | |
| 24 // | |
| 25 // For the terms under which this work may be distributed, please see | |
| 26 // the adjoining file "LICENSE". | |
| 27 // | |
| 28 // ChangeLog: | |
| 29 // 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit | |
| 30 // values throughout. | |
| 31 // --Benjamin Smedberg <benjamin@smedbergs.us> | |
| 32 // 2010-05-26 - Use a paged array for V and I. The address space may be too | |
| 33 // fragmented for these big arrays to be contiguous. | |
| 34 // --Stephen Adams <sra@chromium.org> | |
| 35 // 2015-08-03 - Extract QSufSort to a separate file as template. | |
| 36 // --Samuel Huang <huangs@chromium.org> | |
| 37 // 2015-08-19 - Optimize split(), add comments. | |
| 38 // --Samuel Huang <huangs@chromium.org> | |
| 39 // 2016-04-27 - Change split() to use Bentley & McIlroy's pivot selection | |
| 40 // algorithm, which QSufSort originally used. Reference: | |
| 41 // http://www.larsson.dogma.net/qsufsort.c | |
| 42 // --Samuel Huang <huangs@chromium.org> | |
| 43 | |
| 44 // Copyright 2016 The Chromium Authors. All rights reserved. | |
| 45 // Use of this source code is governed by a BSD-style license that can be | |
| 46 // found in the LICENSE file. | |
| 47 | |
| 48 #ifndef COURGETTE_THIRD_PARTY_BSDIFF_QSUFSORT_H_ | |
| 49 #define COURGETTE_THIRD_PARTY_BSDIFF_QSUFSORT_H_ | |
| 50 | |
| 51 namespace qsuf { | |
| 52 | |
| 53 // ------------------------------------------------------------------------ | |
| 54 // | |
| 55 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the | |
| 56 // code formatting and variable names. The changes from the original are: | |
| 57 // (1) replacing tabs with spaces, | |
| 58 // (2) indentation and spacing, | |
| 59 // (3) using 'const', | |
| 60 // (4) changing the V and I parameters from int* to template <typename T>. | |
| 61 // (5) optimizing split(); fix styles. | |
| 62 // (6) moving matchlen() and search() to a separate file. | |
| 63 // | |
| 64 // The code appears to be a rewritten version of the suffix array algorithm | |
| 65 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko | |
| 66 // Sadakane, special cased for bytes. | |
| 67 | |
| 68 namespace { | |
| 69 | |
| 70 template <typename T> | |
| 71 T median3(const T& a, const T& b, const T& c) { | |
| 72 if (a < b) | |
| 73 return b < c ? b : (a < c ? c : a); | |
| 74 return b > c ? b : (a > c ? c : a); | |
| 75 } | |
| 76 | |
| 77 } // namespace | |
| 78 | |
| 79 template <typename T> | |
| 80 void split(T I, T V, int start, int end, int h) { | |
| 81 // For small interval, apply selection sort. | |
| 82 if (end - start < 16) { | |
| 83 for (int i = start; i < end;) { | |
| 84 int skip = 1; | |
| 85 int best = V[I[i] + h]; | |
| 86 for (int j = i + 1; j < end; j++) { | |
| 87 int cur = V[I[j] + h]; | |
| 88 if (best > cur) { | |
| 89 best = cur; | |
| 90 int tmp = I[i]; | |
| 91 I[i] = I[j]; | |
| 92 I[j] = tmp; | |
| 93 skip = 1; | |
| 94 } else if (best == cur) { | |
| 95 int tmp = I[i + skip]; | |
| 96 I[i + skip] = I[j]; | |
| 97 I[j] = tmp; | |
| 98 ++skip; | |
| 99 } | |
| 100 } | |
| 101 if (skip == 1) { | |
| 102 V[I[i]] = i; | |
| 103 I[i] = -1; | |
| 104 } else { | |
| 105 for (int j = i, jend = i + skip; j < jend; j++) | |
| 106 V[I[j]] = jend - 1; | |
| 107 } | |
| 108 i += skip; | |
| 109 } | |
| 110 return; | |
| 111 } | |
| 112 | |
| 113 // Select pivot, algorithm by Bentley & McIlroy. | |
| 114 int n = end - start; | |
| 115 int mid = start + (n >> 1); | |
| 116 int pivot = V[I[mid] + h]; | |
| 117 int p1 = V[I[start] + h]; | |
| 118 int p2 = V[I[end - 1] + h]; | |
| 119 if (n > 40) { // Big array: Pseudomedian of 9. | |
| 120 int s = n >> 3; | |
| 121 pivot = median3(pivot, V[I[mid - s] + h], V[I[mid + s] + h]); | |
| 122 p1 = median3(p1, V[I[start + s] + h], V[I[start + s + s] + h]); | |
| 123 p2 = median3(p2, V[I[end - 1 - s] + h], V[I[end - 1 - s - s] + h]); | |
| 124 } // Else medium array: Pseudomedian of 3. | |
| 125 pivot = median3(pivot, p1, p2); | |
| 126 | |
| 127 // Split [start, end) into 3 intervals: | |
| 128 // [start, j) with secondary keys < pivot, | |
| 129 // [j, k) with secondary keys == pivot, | |
| 130 // [k, end) with secondary keys > pivot. | |
| 131 int j = start; | |
| 132 int k = end; | |
| 133 for (int i = start; i < k;) { | |
| 134 int cur = V[I[i] + h]; | |
| 135 if (cur < pivot) { | |
| 136 if (i != j) { | |
| 137 int tmp = I[i]; | |
| 138 I[i] = I[j]; | |
| 139 I[j] = tmp; | |
| 140 } | |
| 141 ++i; | |
| 142 ++j; | |
| 143 } else if (cur > pivot) { | |
| 144 --k; | |
| 145 int tmp = I[i]; | |
| 146 I[i] = I[k]; | |
| 147 I[k] = tmp; | |
| 148 } else { | |
| 149 ++i; | |
| 150 } | |
| 151 } | |
| 152 | |
| 153 // Recurse on the "< pivot" piece. | |
| 154 if (start < j) | |
| 155 split<T>(I, V, start, j, h); | |
| 156 | |
| 157 // Update the "== pivot" piece. | |
| 158 if (j == k - 1) { | |
| 159 V[I[j]] = j; | |
| 160 I[j] = -1; | |
| 161 } else { | |
| 162 for (int i = j; i < k; ++i) | |
| 163 V[I[i]] = k - 1; | |
| 164 } | |
| 165 | |
| 166 // Recurse on the "> pivot" piece. | |
| 167 if (k < end) | |
| 168 split<T>(I, V, k, end, h); | |
| 169 } | |
| 170 | |
| 171 template <class T> | |
| 172 static void qsufsort(T I, T V, const unsigned char* old, int oldsize) { | |
| 173 int buckets[256]; | |
| 174 int i, h, len; | |
| 175 | |
| 176 for (i = 0; i < 256; i++) | |
| 177 buckets[i] = 0; | |
| 178 for (i = 0; i < oldsize; i++) | |
| 179 buckets[old[i]]++; | |
| 180 for (i = 1; i < 256; i++) | |
| 181 buckets[i] += buckets[i - 1]; | |
| 182 for (i = 255; i > 0; i--) | |
| 183 buckets[i] = buckets[i - 1]; | |
| 184 buckets[0] = 0; | |
| 185 | |
| 186 for (i = 0; i < oldsize; i++) | |
| 187 I[++buckets[old[i]]] = i; | |
| 188 I[0] = oldsize; | |
| 189 for (i = 0; i < oldsize; i++) | |
| 190 V[i] = buckets[old[i]]; | |
| 191 V[oldsize] = 0; | |
| 192 for (i = 1; i < 256; i++) | |
| 193 if (buckets[i] == buckets[i - 1] + 1) | |
| 194 I[buckets[i]] = -1; | |
| 195 I[0] = -1; | |
| 196 | |
| 197 for (h = 1; I[0] != -(oldsize + 1); h += h) { | |
| 198 len = 0; | |
| 199 for (i = 0; i < oldsize + 1;) { | |
| 200 if (I[i] < 0) { | |
| 201 len -= I[i]; | |
| 202 i -= I[i]; | |
| 203 } else { | |
| 204 if (len) | |
| 205 I[i - len] = -len; | |
| 206 len = V[I[i]] + 1 - i; | |
| 207 split<T>(I, V, i, i + len, h); | |
| 208 i += len; | |
| 209 len = 0; | |
| 210 }; | |
| 211 }; | |
| 212 if (len) | |
| 213 I[i - len] = -len; | |
| 214 }; | |
| 215 | |
| 216 for (i = 0; i < oldsize + 1; i++) | |
| 217 I[V[i]] = i; | |
| 218 } | |
| 219 | |
| 220 // End of 'verbatim' code. | |
| 221 // ------------------------------------------------------------------------ | |
| 222 | |
| 223 } // namespace qsuf | |
| 224 | |
| 225 #endif // COURGETTE_THIRD_PARTY_BSDIFF_QSUFSORT_H_ | |
| OLD | NEW |