Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(356)

Side by Side Diff: courgette/third_party/divsufsort/sssort.cc

Issue 1948843002: [Courgette Experimental] Replace QSufSort with libdivsufsort Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Sync and merge. Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * sssort.cc for libdivsufsort
3 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4 *
5 * For the terms under which this work may be distributed, please see
6 * the adjoining file "LICENSE".
7 *
8 * Changelog:
9 * 2016-06-02 - Initial commit and adaption to use PagedArray.
10 * --Samuel Huang <huangs@chromium.org>
11 */
12
13 #include "courgette/third_party/divsufsort/divsufsort_private.h"
14
15 #if defined(SS_INSERTIONSORT_THRESHOLD)
16 # if SS_INSERTIONSORT_THRESHOLD < 1
17 # undef SS_INSERTIONSORT_THRESHOLD
18 # define SS_INSERTIONSORT_THRESHOLD (1)
19 # endif
20 #else
21 # define SS_INSERTIONSORT_THRESHOLD (8)
22 #endif
23 #if defined(SS_BLOCKSIZE)
24 # if SS_BLOCKSIZE < 0
25 # undef SS_BLOCKSIZE
26 # define SS_BLOCKSIZE (0)
27 # elif 32768 <= SS_BLOCKSIZE
28 # undef SS_BLOCKSIZE
29 # define SS_BLOCKSIZE (32767)
30 # endif
31 #else
32 # define SS_BLOCKSIZE (1024)
33 #endif
34 /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
35 #if SS_BLOCKSIZE == 0
36 # define SS_MISORT_STACKSIZE (64)
37 #elif SS_BLOCKSIZE <= 4096
38 # define SS_MISORT_STACKSIZE (16)
39 #else
40 # define SS_MISORT_STACKSIZE (24)
41 #endif
42 #define SS_SMERGE_STACKSIZE (32)
43
44 #define STACK_PUSH(_a, _b, _c, _d)\
45 do {\
46 assert(ssize < STACK_SIZE);\
47 stack[ssize].a = (_a), stack[ssize].b = (_b),\
48 stack[ssize].c = (_c), stack[ssize++].d = (_d);\
49 } while(0)
50 #define STACK_POP(_a, _b, _c, _d)\
51 do {\
52 assert(0 <= ssize);\
53 if(ssize == 0) { return; }\
54 (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
55 (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
56 } while(0)
57
58 namespace divsuf {
59
60 namespace {
61
62 /*- Private Functions -*/
63
64 const saint_t lg_table[256]= {
65 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
66 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
67 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
68 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
69 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
70 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
71 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
72 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
73 };
74
75 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
76
77 inline
78 saint_t
79 ss_ilg(saidx_t n) {
80 #if SS_BLOCKSIZE == 0
81 return (n & 0xffff0000) ?
82 ((n & 0xff000000) ?
83 24 + lg_table[(n >> 24) & 0xff] :
84 16 + lg_table[(n >> 16) & 0xff]) :
85 ((n & 0x0000ff00) ?
86 8 + lg_table[(n >> 8) & 0xff] :
87 0 + lg_table[(n >> 0) & 0xff]);
88 #elif SS_BLOCKSIZE < 256
89 return lg_table[n];
90 #else
91 return (n & 0xff00) ?
92 8 + lg_table[(n >> 8) & 0xff] :
93 0 + lg_table[(n >> 0) & 0xff];
94 #endif
95 }
96
97 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
98
99 #if SS_BLOCKSIZE != 0
100
101 const saint_t sqq_table[256] = {
102 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
103 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
104 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
105 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
106 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
107 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
108 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
109 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
110 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
111 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
112 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
113 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
114 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
115 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
116 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
117 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
118 };
119
120 inline
121 saidx_t
122 ss_isqrt(saidx_t x) {
123 saidx_t y, e;
124
125 if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
126 e = (x & 0xffff0000) ?
127 ((x & 0xff000000) ?
128 24 + lg_table[(x >> 24) & 0xff] :
129 16 + lg_table[(x >> 16) & 0xff]) :
130 ((x & 0x0000ff00) ?
131 8 + lg_table[(x >> 8) & 0xff] :
132 0 + lg_table[(x >> 0) & 0xff]);
133
134 if(e >= 16) {
135 y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
136 if(e >= 24) { y = (y + 1 + x / y) >> 1; }
137 y = (y + 1 + x / y) >> 1;
138 } else if(e >= 8) {
139 y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
140 } else {
141 return sqq_table[x] >> 4;
142 }
143
144 return (x < (y * y)) ? y - 1 : y;
145 }
146
147 #endif /* SS_BLOCKSIZE != 0 */
148
149
150 /*---------------------------------------------------------------------------*/
151
152 /* Compares two suffixes. */
153 inline
154 saint_t
155 ss_compare_internal(const sauchar_t *T,
156 saidx_t p1_lo,
157 saidx_t p1_hi,
158 saidx_t p2_lo,
159 saidx_t p2_hi,
160 saidx_t depth) {
161 const sauchar_t *U1, *U2, *U1n, *U2n;
162
163 for(U1 = T + depth + p1_lo,
164 U2 = T + depth + p2_lo,
165 U1n = T + p1_hi + 2,
166 U2n = T + p2_hi + 2;
167 (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
168 ++U1, ++U2) {
169 }
170
171 return U1 < U1n ?
172 (U2 < U2n ? *U1 - *U2 : 1) :
173 (U2 < U2n ? -1 : 0);
174 }
175
176 /* Compares two suffixes. */
177 inline
178 saint_t
179 ss_compare(const sauchar_t *T,
180 const_saidx_it p1, const_saidx_it p2,
181 saidx_t depth) {
182 return ss_compare_internal(T, *p1, *(p1 + 1), *p2, *(p2 + 1), depth);
183 }
184
185
186 /*---------------------------------------------------------------------------*/
187
188 #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
189
190 /* Insertionsort for small size groups */
191 void
192 ss_insertionsort(const sauchar_t *T, const_saidx_it PA,
193 saidx_it first, saidx_it last, saidx_t depth) {
194 saidx_it i, j;
195 saidx_t t;
196 saint_t r;
197
198 for(i = last - 2; first <= i; --i) {
199 for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
200 do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
201 if(last <= j) { break; }
202 }
203 if(r == 0) { *j = ~*j; }
204 *(j - 1) = t;
205 }
206 }
207
208 #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
209
210
211 /*---------------------------------------------------------------------------*/
212
213 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
214
215 inline
216 void
217 ss_fixdown(const sauchar_t *Td, const_saidx_it PA,
218 saidx_it SA, saidx_t i, saidx_t size) {
219 saidx_t j, k;
220 saidx_t v;
221 saint_t c, d, e;
222
223 for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
224 d = Td[PA[SA[k = j++]]];
225 if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
226 if(d <= c) { break; }
227 }
228 SA[i] = v;
229 }
230
231 /* Simple top-down heapsort. */
232 void
233 ss_heapsort(const sauchar_t *Td, const_saidx_it PA, saidx_it SA, saidx_t size) {
234 saidx_t i, m;
235 saidx_t t;
236
237 m = size;
238 if((size % 2) == 0) {
239 m--;
240 if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
241 }
242
243 for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
244 if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
245 for(i = m - 1; 0 < i; --i) {
246 t = SA[0], SA[0] = SA[i];
247 ss_fixdown(Td, PA, SA, 0, i);
248 SA[i] = t;
249 }
250 }
251
252
253 /*---------------------------------------------------------------------------*/
254
255 /* Returns the median of three elements. */
256 inline
257 saidx_it
258 ss_median3(const sauchar_t *Td, const_saidx_it PA,
259 saidx_it v1, saidx_it v2, saidx_it v3) {
260 saidx_it t;
261 if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
262 if(Td[PA[*v2]] > Td[PA[*v3]]) {
263 if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
264 else { return v3; }
265 }
266 return v2;
267 }
268
269 /* Returns the median of five elements. */
270 inline
271 saidx_it
272 ss_median5(const sauchar_t *Td, const_saidx_it PA,
273 saidx_it v1, saidx_it v2, saidx_it v3, saidx_it v4, saidx_it v5) {
274 saidx_it t;
275 if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
276 if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
277 if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
278 if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
279 if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
280 if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
281 return v3;
282 }
283
284 /* Returns the pivot element. */
285 inline
286 saidx_it
287 ss_pivot(const sauchar_t *Td, const_saidx_it PA, saidx_it first, saidx_it last) {
288 saidx_it middle;
289 saidx_t t;
290
291 t = last - first;
292 middle = first + t / 2;
293
294 if(t <= 512) {
295 if(t <= 32) {
296 return ss_median3(Td, PA, first, middle, last - 1);
297 } else {
298 t >>= 2;
299 return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1 );
300 }
301 }
302 t >>= 3;
303 first = ss_median3(Td, PA, first, first + t, first + (t << 1));
304 middle = ss_median3(Td, PA, middle - t, middle, middle + t);
305 last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
306 return ss_median3(Td, PA, first, middle, last);
307 }
308
309
310 /*---------------------------------------------------------------------------*/
311
312 /* Binary partition for substrings. */
313 inline
314 saidx_it
315 ss_partition(const_saidx_it PA,
316 saidx_it first, saidx_it last, saidx_t depth) {
317 saidx_it a, b;
318 saidx_t t;
319 for(a = first - 1, b = last;;) {
320 for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
321 for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
322 if(b <= a) { break; }
323 t = ~*b;
324 *b = *a;
325 *a = t;
326 }
327 if(first < a) { *first = ~*first; }
328 return a;
329 }
330
331 /* Multikey introsort for medium size groups. */
332 void
333 ss_mintrosort(const sauchar_t *T, const_saidx_it PA,
334 saidx_it first, saidx_it last,
335 saidx_t depth) {
336 #define STACK_SIZE SS_MISORT_STACKSIZE
337 struct { saidx_it a, b; saidx_t c; saint_t d; } stack[STACK_SIZE];
338 const sauchar_t *Td;
339 saidx_it a, b, c, d, e, f;
340 saidx_t s, t;
341 saint_t ssize;
342 saint_t limit;
343 saint_t v, x = 0;
344
345 for(ssize = 0, limit = ss_ilg(last - first);;) {
346
347 if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
348 #if 1 < SS_INSERTIONSORT_THRESHOLD
349 if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
350 #endif
351 STACK_POP(first, last, depth, limit);
352 continue;
353 }
354
355 Td = T + depth;
356 if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
357 if(limit < 0) {
358 for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
359 if((x = Td[PA[*a]]) != v) {
360 if(1 < (a - first)) { break; }
361 v = x;
362 first = a;
363 }
364 }
365 if(Td[PA[*first] - 1] < v) {
366 first = ss_partition(PA, first, a, depth);
367 }
368 if((a - first) <= (last - a)) {
369 if(1 < (a - first)) {
370 STACK_PUSH(a, last, depth, -1);
371 last = a, depth += 1, limit = ss_ilg(a - first);
372 } else {
373 first = a, limit = -1;
374 }
375 } else {
376 if(1 < (last - a)) {
377 STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
378 first = a, limit = -1;
379 } else {
380 last = a, depth += 1, limit = ss_ilg(a - first);
381 }
382 }
383 continue;
384 }
385
386 /* choose pivot */
387 a = ss_pivot(Td, PA, first, last);
388 v = Td[PA[*a]];
389 SWAP(*first, *a);
390
391 /* partition */
392 for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
393 if(((a = b) < last) && (x < v)) {
394 for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
395 if(x == v) { SWAP(*b, *a); ++a; }
396 }
397 }
398 for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
399 if((b < (d = c)) && (x > v)) {
400 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
401 if(x == v) { SWAP(*c, *d); --d; }
402 }
403 }
404 for(; b < c;) {
405 SWAP(*b, *c);
406 for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
407 if(x == v) { SWAP(*b, *a); ++a; }
408 }
409 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
410 if(x == v) { SWAP(*c, *d); --d; }
411 }
412 }
413
414 if(a <= d) {
415 c = b - 1;
416
417 if((s = a - first) > (t = b - a)) { s = t; }
418 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
419 if((s = d - c) > (t = last - d - 1)) { s = t; }
420 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
421
422 a = first + (b - a), c = last - (d - c);
423 b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
424
425 if((a - first) <= (last - c)) {
426 if((last - c) <= (c - b)) {
427 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
428 STACK_PUSH(c, last, depth, limit);
429 last = a;
430 } else if((a - first) <= (c - b)) {
431 STACK_PUSH(c, last, depth, limit);
432 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
433 last = a;
434 } else {
435 STACK_PUSH(c, last, depth, limit);
436 STACK_PUSH(first, a, depth, limit);
437 first = b, last = c, depth += 1, limit = ss_ilg(c - b);
438 }
439 } else {
440 if((a - first) <= (c - b)) {
441 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
442 STACK_PUSH(first, a, depth, limit);
443 first = c;
444 } else if((last - c) <= (c - b)) {
445 STACK_PUSH(first, a, depth, limit);
446 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
447 first = c;
448 } else {
449 STACK_PUSH(first, a, depth, limit);
450 STACK_PUSH(c, last, depth, limit);
451 first = b, last = c, depth += 1, limit = ss_ilg(c - b);
452 }
453 }
454 } else {
455 limit += 1;
456 if(Td[PA[*first] - 1] < v) {
457 first = ss_partition(PA, first, last, depth);
458 limit = ss_ilg(last - first);
459 }
460 depth += 1;
461 }
462 }
463 #undef STACK_SIZE
464 }
465
466 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
467
468
469 /*---------------------------------------------------------------------------*/
470
471 #if SS_BLOCKSIZE != 0
472
473 inline
474 void
475 ss_blockswap(saidx_it a, saidx_it b, saidx_t n) {
476 saidx_t t;
477 for(; 0 < n; --n, ++a, ++b) {
478 t = *a, *a = *b, *b = t;
479 }
480 }
481
482 inline
483 void
484 ss_rotate(saidx_it first, saidx_it middle, saidx_it last) {
485 saidx_it a, b;
486 saidx_t t;
487 saidx_t l, r;
488 l = middle - first, r = last - middle;
489 for(; (0 < l) && (0 < r);) {
490 if(l == r) { ss_blockswap(first, middle, l); break; }
491 if(l < r) {
492 a = last - 1, b = middle - 1;
493 t = *a;
494 do {
495 *a-- = *b, *b-- = *a;
496 if(b < first) {
497 *a = t;
498 last = a;
499 if((r -= l + 1) <= l) { break; }
500 a -= 1, b = middle - 1;
501 t = *a;
502 }
503 } while(1);
504 } else {
505 a = first, b = middle;
506 t = *a;
507 do {
508 *a++ = *b, *b++ = *a;
509 if(last <= b) {
510 *a = t;
511 first = a + 1;
512 if((l -= r + 1) <= r) { break; }
513 a += 1, b = middle;
514 t = *a;
515 }
516 } while(1);
517 }
518 }
519 }
520
521
522 /*---------------------------------------------------------------------------*/
523
524 void
525 ss_inplacemerge(const sauchar_t *T, const_saidx_it PA,
526 saidx_it first, saidx_it middle, saidx_it last,
527 saidx_t depth) {
528 const_saidx_it p;
529 saidx_it a, b;
530 saidx_t len, half;
531 saint_t q, r;
532 saint_t x;
533
534 for(;;) {
535 if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
536 else { x = 0; p = PA + *(last - 1); }
537 for(a = first, len = middle - first, half = len >> 1, r = -1;
538 0 < len;
539 len = half, half >>= 1) {
540 b = a + half;
541 q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
542 if(q < 0) {
543 a = b + 1;
544 half -= (len & 1) ^ 1;
545 } else {
546 r = q;
547 }
548 }
549 if(a < middle) {
550 if(r == 0) { *a = ~*a; }
551 ss_rotate(a, middle, last);
552 last -= middle - a;
553 middle = a;
554 if(first == middle) { break; }
555 }
556 --last;
557 if(x != 0) { while(*--last < 0) { } }
558 if(middle == last) { break; }
559 }
560 }
561
562
563 /*---------------------------------------------------------------------------*/
564
565 /* Merge-forward with internal buffer. */
566 static
567 void
568 ss_mergeforward(const sauchar_t *T, const_saidx_it PA,
569 saidx_it first, saidx_it middle, saidx_it last,
570 saidx_it buf, saidx_t depth) {
571 saidx_it a, b, c, bufend;
572 saidx_t t;
573 saint_t r;
574
575 bufend = buf + (middle - first) - 1;
576 ss_blockswap(buf, first, middle - first);
577
578 for(t = *(a = first), b = buf, c = middle;;) {
579 r = ss_compare(T, PA + *b, PA + *c, depth);
580 if(r < 0) {
581 do {
582 *a++ = *b;
583 if(bufend <= b) { *bufend = t; return; }
584 *b++ = *a;
585 } while(*b < 0);
586 } else if(r > 0) {
587 do {
588 *a++ = *c, *c++ = *a;
589 if(last <= c) {
590 while(b < bufend) { *a++ = *b, *b++ = *a; }
591 *a = *b, *b = t;
592 return;
593 }
594 } while(*c < 0);
595 } else {
596 *c = ~*c;
597 do {
598 *a++ = *b;
599 if(bufend <= b) { *bufend = t; return; }
600 *b++ = *a;
601 } while(*b < 0);
602
603 do {
604 *a++ = *c, *c++ = *a;
605 if(last <= c) {
606 while(b < bufend) { *a++ = *b, *b++ = *a; }
607 *a = *b, *b = t;
608 return;
609 }
610 } while(*c < 0);
611 }
612 }
613 }
614
615 /* Merge-backward with internal buffer. */
616 void
617 ss_mergebackward(const sauchar_t *T, const_saidx_it PA,
618 saidx_it first, saidx_it middle, saidx_it last,
619 saidx_it buf, saidx_t depth) {
620 const_saidx_it p1, p2;
621 saidx_it a, b, c, bufend;
622 saidx_t t;
623 saint_t r;
624 saint_t x;
625
626 bufend = buf + (last - middle) - 1;
627 ss_blockswap(buf, middle, last - middle);
628
629 x = 0;
630 if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
631 else { p1 = PA + *bufend; }
632 if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
633 else { p2 = PA + *(middle - 1); }
634 for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
635 r = ss_compare(T, p1, p2, depth);
636 if(0 < r) {
637 if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
638 *a-- = *b;
639 if(b <= buf) { *buf = t; break; }
640 *b-- = *a;
641 if(*b < 0) { p1 = PA + ~*b; x |= 1; }
642 else { p1 = PA + *b; }
643 } else if(r < 0) {
644 if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
645 *a-- = *c, *c-- = *a;
646 if(c < first) {
647 while(buf < b) { *a-- = *b, *b-- = *a; }
648 *a = *b, *b = t;
649 break;
650 }
651 if(*c < 0) { p2 = PA + ~*c; x |= 2; }
652 else { p2 = PA + *c; }
653 } else {
654 if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
655 *a-- = ~*b;
656 if(b <= buf) { *buf = t; break; }
657 *b-- = *a;
658 if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
659 *a-- = *c, *c-- = *a;
660 if(c < first) {
661 while(buf < b) { *a-- = *b, *b-- = *a; }
662 *a = *b, *b = t;
663 break;
664 }
665 if(*b < 0) { p1 = PA + ~*b; x |= 1; }
666 else { p1 = PA + *b; }
667 if(*c < 0) { p2 = PA + ~*c; x |= 2; }
668 else { p2 = PA + *c; }
669 }
670 }
671 }
672
673 /* D&C based merge. */
674 void
675 ss_swapmerge(const sauchar_t *T, const_saidx_it PA,
676 saidx_it first, saidx_it middle, saidx_it last,
677 saidx_it buf, saidx_t bufsize, saidx_t depth) {
678 #define STACK_SIZE SS_SMERGE_STACKSIZE
679 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
680 #define MERGE_CHECK(a, b, c)\
681 do {\
682 if(((c) & 1) ||\
683 (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) = = 0))) {\
684 *(a) = ~*(a);\
685 }\
686 if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) = = 0))) {\
687 *(b) = ~*(b);\
688 }\
689 } while(0)
690 struct { saidx_it a, b, c; saint_t d; } stack[STACK_SIZE];
691 saidx_it l, r, lm, rm;
692 saidx_t m, len, half;
693 saint_t ssize;
694 saint_t check, next;
695
696 for(check = 0, ssize = 0;;) {
697 if((last - middle) <= bufsize) {
698 if((first < middle) && (middle < last)) {
699 ss_mergebackward(T, PA, first, middle, last, buf, depth);
700 }
701 MERGE_CHECK(first, last, check);
702 STACK_POP(first, middle, last, check);
703 continue;
704 }
705
706 if((middle - first) <= bufsize) {
707 if(first < middle) {
708 ss_mergeforward(T, PA, first, middle, last, buf, depth);
709 }
710 MERGE_CHECK(first, last, check);
711 STACK_POP(first, middle, last, check);
712 continue;
713 }
714
715 for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
716 0 < len;
717 len = half, half >>= 1) {
718 if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
719 PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
720 m += half + 1;
721 half -= (len & 1) ^ 1;
722 }
723 }
724
725 if(0 < m) {
726 lm = middle - m, rm = middle + m;
727 ss_blockswap(lm, middle, m);
728 l = r = middle, next = 0;
729 if(rm < last) {
730 if(*rm < 0) {
731 *rm = ~*rm;
732 if(first < lm) { for(; *--l < 0;) { } next |= 4; }
733 next |= 1;
734 } else if(first < lm) {
735 for(; *r < 0; ++r) { }
736 next |= 2;
737 }
738 }
739
740 if((l - first) <= (last - r)) {
741 STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
742 middle = lm, last = l, check = (check & 3) | (next & 4);
743 } else {
744 if((next & 2) && (r == middle)) { next ^= 6; }
745 STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
746 first = r, middle = rm, check = (next & 3) | (check & 4);
747 }
748 } else {
749 if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
750 *middle = ~*middle;
751 }
752 MERGE_CHECK(first, last, check);
753 STACK_POP(first, middle, last, check);
754 }
755 }
756 #undef STACK_SIZE
757 }
758
759 #endif /* SS_BLOCKSIZE != 0 */
760
761 } // namespace
762
763 /*---------------------------------------------------------------------------*/
764
765 /*- Function -*/
766
767 /* Substring sort */
768 void
769 sssort(const sauchar_t *T, const_saidx_it PA,
770 saidx_it first, saidx_it last,
771 saidx_it buf, saidx_t bufsize,
772 saidx_t depth, saidx_t n, saint_t lastsuffix) {
773 saidx_it a;
774 #if SS_BLOCKSIZE != 0
775 saidx_it b, middle, curbuf;
776 saidx_t j, k, curbufsize, limit;
777 #endif
778 saidx_t i;
779
780 if(lastsuffix != 0) { ++first; }
781
782 #if SS_BLOCKSIZE == 0
783 ss_mintrosort(T, PA, first, last, depth);
784 #else
785 if((bufsize < SS_BLOCKSIZE) &&
786 (bufsize < (last - first)) &&
787 (bufsize < (limit = ss_isqrt(last - first)))) {
788 if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
789 buf = middle = last - limit, bufsize = limit;
790 } else {
791 middle = last, limit = 0;
792 }
793 for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
794 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
795 ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
796 #elif 1 < SS_BLOCKSIZE
797 ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
798 #endif
799 curbufsize = last - (a + SS_BLOCKSIZE);
800 curbuf = a + SS_BLOCKSIZE;
801 if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
802 for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
803 ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
804 }
805 }
806 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
807 ss_mintrosort(T, PA, a, middle, depth);
808 #elif 1 < SS_BLOCKSIZE
809 ss_insertionsort(T, PA, a, middle, depth);
810 #endif
811 for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
812 if(i & 1) {
813 ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
814 a -= k;
815 }
816 }
817 if(limit != 0) {
818 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
819 ss_mintrosort(T, PA, middle, last, depth);
820 #elif 1 < SS_BLOCKSIZE
821 ss_insertionsort(T, PA, middle, last, depth);
822 #endif
823 ss_inplacemerge(T, PA, first, middle, last, depth);
824 }
825 #endif
826
827 if(lastsuffix != 0) {
828 /* Insert last type B* suffix. */
829 for(a = first, i = *(first - 1);
830 (a < last) && ((*a < 0) ||
831 (0 < ss_compare_internal(T, PA[*(first - 1)], n - 2,
832 PA[*a], PA[*a + 1], depth)));
833 ++a) {
834 *(a - 1) = *a;
835 }
836 *(a - 1) = i;
837 }
838 }
839
840 } // namespace divsuf
OLDNEW
« no previous file with comments | « courgette/third_party/divsufsort/divsufsort_unittest.cc ('k') | courgette/third_party/divsufsort/trsort.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698