Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(231)

Side by Side Diff: courgette/third_party/divsufsort/divsufsort.cc

Issue 1948843002: [Courgette Experimental] Replace QSufSort with libdivsufsort Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Sync and merge. Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * divsufsort.cc for libdivsufsort
3 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4 *
5 * For the terms under which this work may be distributed, please see
6 * the adjoining file "LICENSE".
7 *
8 * Changelog:
9 * 2016-06-02 - Initial commit and adaption to use PagedArray.
10 * --Samuel Huang <huangs@chromium.org>
11 */
12
13 #include "courgette/third_party/divsufsort/divsufsort_private.h"
14
15 #include <stdlib.h>
16
17 #define BUCKET_A_SIZE (ALPHABET_SIZE)
18 #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
19
20 #define BUCKET_A(_c0) bucket_A[(_c0)]
21 #if ALPHABET_SIZE == 256
22 #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
23 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
24 #else
25 #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
26 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
27 #endif
28
29 namespace divsuf {
30
31 /*- Private Functions -*/
32
33 namespace {
34
35 /* Sorts suffixes of type B*. */
36 saidx_t
37 sort_typeBstar(const sauchar_t *T, saidx_it SA,
38 saidx_t *bucket_A, saidx_t *bucket_B,
39 saidx_t n) {
40 saidx_it PAb, ISAb, buf;
41 saidx_t i, j, k, t, m, bufsize;
42 saint_t c0, c1;
43
44 /* Initialize bucket arrays. */
45 for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
46 for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
47
48 /* Count the number of occurrences of the first one or two characters of each
49 type A, B and B* suffix. Moreover, store the beginning position of all
50 type B* suffixes into the array SA. */
51 for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
52 /* type A suffix. */
53 do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
54 if(0 <= i) {
55 /* type B* suffix. */
56 ++BUCKET_BSTAR(c0, c1);
57 SA[--m] = i;
58 /* type B suffix. */
59 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
60 ++BUCKET_B(c0, c1);
61 }
62 }
63 }
64 m = n - m;
65 /*
66 note:
67 A type B* suffix is lexicographically smaller than a type B suffix that
68 begins with the same first two characters.
69 */
70
71 /* Calculate the index of start/end point of each bucket. */
72 for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
73 t = i + BUCKET_A(c0);
74 BUCKET_A(c0) = i + j; /* start point */
75 i = t + BUCKET_B(c0, c0);
76 for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
77 j += BUCKET_BSTAR(c0, c1);
78 BUCKET_BSTAR(c0, c1) = j; /* end point */
79 i += BUCKET_B(c0, c1);
80 }
81 }
82
83 if(0 < m) {
84 /* Sort the type B* suffixes by their first two characters. */
85 PAb = SA + n - m; ISAb = SA + m;
86 for(i = m - 2; 0 <= i; --i) {
87 t = PAb[i], c0 = T[t], c1 = T[t + 1];
88 SA[--BUCKET_BSTAR(c0, c1)] = i;
89 }
90 t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
91 SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
92
93 /* Sort the type B* substrings using sssort. */
94 buf = SA + m, bufsize = n - (2 * m);
95 for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
96 for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
97 i = BUCKET_BSTAR(c0, c1);
98 if(1 < (j - i)) {
99 sssort(T, PAb, SA + i, SA + j,
100 buf, bufsize, 2, n, *(SA + i) == (m - 1));
101 }
102 }
103 }
104
105 /* Compute ranks of type B* substrings. */
106 for(i = m - 1; 0 <= i; --i) {
107 if(0 <= SA[i]) {
108 j = i;
109 do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
110 SA[i + 1] = i - j;
111 if(i <= 0) { break; }
112 }
113 j = i;
114 do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
115 ISAb[SA[i]] = j;
116 }
117
118 /* Construct the inverse suffix array of type B* suffixes using trsort. */
119 trsort(ISAb, SA, m, 1);
120
121 /* Set the sorted order of tyoe B* suffixes. */
122 for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
123 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
124 if(0 <= i) {
125 t = i;
126 for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
127 SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
128 }
129 }
130
131 /* Calculate the index of start/end point of each bucket. */
132 BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
133 for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
134 i = BUCKET_A(c0 + 1) - 1;
135 for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
136 t = i - BUCKET_B(c0, c1);
137 BUCKET_B(c0, c1) = i; /* end point */
138
139 /* Move all type B* suffixes to the correct position. */
140 for(i = t, j = BUCKET_BSTAR(c0, c1);
141 j <= k;
142 --i, --k) { SA[i] = SA[k]; }
143 }
144 BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
145 BUCKET_B(c0, c0) = i; /* end point */
146 }
147 }
148
149 return m;
150 }
151
152 /* Constructs the suffix array by using the sorted order of type B* suffixes. */
153 void
154 construct_SA(const sauchar_t *T, saidx_it SA,
155 saidx_t *bucket_A, saidx_t *bucket_B,
156 saidx_t n, saidx_t m) {
157 saidx_it i, j, k;
158 saidx_t s;
159 saint_t c0, c1, c2;
160
161 if(0 < m) {
162 /* Construct the sorted order of type B suffixes by using
163 the sorted order of type B* suffixes. */
164 for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
165 /* Scan the suffix array from right to left. */
166 for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
167 j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
168 i <= j;
169 --j) {
170 if(0 < (s = *j)) {
171 assert(T[s] == c1);
172 assert(((s + 1) < n) && (T[s] <= T[s + 1]));
173 assert(T[s - 1] <= T[s]);
174 *j = ~s;
175 c0 = T[--s];
176 if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
177 if(c0 != c2) {
178 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
179 k = SA + BUCKET_B(c2 = c0, c1);
180 }
181 assert(k < j);
182 *k-- = s;
183 } else {
184 assert(((s == 0) && (T[s] == c1)) || (s < 0));
185 *j = ~s;
186 }
187 }
188 }
189 }
190
191 /* Construct the suffix array by using
192 the sorted order of type B suffixes. */
193 k = SA + BUCKET_A(c2 = T[n - 1]);
194 *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
195 /* Scan the suffix array from left to right. */
196 for(i = SA, j = SA + n; i < j; ++i) {
197 if(0 < (s = *i)) {
198 assert(T[s - 1] >= T[s]);
199 c0 = T[--s];
200 if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
201 if(c0 != c2) {
202 BUCKET_A(c2) = k - SA;
203 k = SA + BUCKET_A(c2 = c0);
204 }
205 assert(i < k);
206 *k++ = s;
207 } else {
208 assert(s < 0);
209 *i = ~s;
210 }
211 }
212 }
213
214 } // namespace
215
216 /*---------------------------------------------------------------------------*/
217
218 /*- Function -*/
219
220 saint_t
221 divsufsort(const sauchar_t *T, saidx_it SA, saidx_t n) {
222 saidx_t *bucket_A, *bucket_B;
223 saidx_t m;
224 saint_t err = 0;
225
226 /* Check arguments. */
227 if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
228 else if(n == 0) { return 0; }
229 else if(n == 1) { SA[0] = 0; return 0; }
230 else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
231
232 bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
233 bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
234
235 /* Suffixsort. */
236 if((bucket_A != NULL) && (bucket_B != NULL)) {
237 m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
238 construct_SA(T, SA, bucket_A, bucket_B, n, m);
239 } else {
240 err = -2;
241 }
242
243 free(bucket_B);
244 free(bucket_A);
245
246 return err;
247 }
248
249 } // namespace divsuf
OLDNEW
« no previous file with comments | « courgette/third_party/divsufsort/divsufsort.h ('k') | courgette/third_party/divsufsort/divsufsort_private.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698