Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: courgette/third_party/divsufsort/trsort.cc

Issue 1948843002: [Courgette Experimental] Replace QSufSort with libdivsufsort Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Sync and merge. Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « courgette/third_party/divsufsort/sssort.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * trsort.cc for libdivsufsort
3 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4 *
5 * For the terms under which this work may be distributed, please see
6 * the adjoining file "LICENSE".
7 *
8 * Changelog:
9 * 2016-06-02 - Initial commit and adaption to use PagedArray.
10 * --Samuel Huang <huangs@chromium.org>
11 */
12
13 #include "courgette/third_party/divsufsort/divsufsort_private.h"
14
15 #define TR_INSERTIONSORT_THRESHOLD (8)
16 #define TR_STACKSIZE (64)
17
18 #define STACK_PUSH5(_a, _b, _c, _d, _e)\
19 do {\
20 assert(ssize < STACK_SIZE);\
21 stack[ssize].a = (_a), stack[ssize].b = (_b),\
22 stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
23 } while(0)
24 #define STACK_POP5(_a, _b, _c, _d, _e)\
25 do {\
26 assert(0 <= ssize);\
27 if(ssize == 0) { return; }\
28 (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
29 (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
30 } while(0)
31
32
33 namespace divsuf {
34
35 namespace {
36
37 /*- Private Functions -*/
38
39 const saint_t lg_table[256]= {
40 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
41 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
42 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
43 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
44 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
45 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
46 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
47 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
48 };
49
50 inline
51 saint_t
52 tr_ilg(saidx_t n) {
53 return (n & 0xffff0000) ?
54 ((n & 0xff000000) ?
55 24 + lg_table[(n >> 24) & 0xff] :
56 16 + lg_table[(n >> 16) & 0xff]) :
57 ((n & 0x0000ff00) ?
58 8 + lg_table[(n >> 8) & 0xff] :
59 0 + lg_table[(n >> 0) & 0xff]);
60 }
61
62
63 /*---------------------------------------------------------------------------*/
64
65 /* Simple insertionsort for small size groups. */
66 void
67 tr_insertionsort(const_saidx_it ISAd, saidx_it first, saidx_it last) {
68 saidx_it a, b;
69 saidx_t t, r;
70
71 for(a = first + 1; a < last; ++a) {
72 for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
73 do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
74 if(b < first) { break; }
75 }
76 if(r == 0) { *b = ~*b; }
77 *(b + 1) = t;
78 }
79 }
80
81
82 /*---------------------------------------------------------------------------*/
83
84 inline
85 void
86 tr_fixdown(const_saidx_it ISAd, saidx_it SA, saidx_t i, saidx_t size) {
87 saidx_t j, k;
88 saidx_t v;
89 saidx_t c, d, e;
90
91 for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
92 d = ISAd[SA[k = j++]];
93 if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
94 if(d <= c) { break; }
95 }
96 SA[i] = v;
97 }
98
99 /* Simple top-down heapsort. */
100 void
101 tr_heapsort(const_saidx_it ISAd, saidx_it SA, saidx_t size) {
102 saidx_t i, m;
103 saidx_t t;
104
105 m = size;
106 if((size % 2) == 0) {
107 m--;
108 if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
109 }
110
111 for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
112 if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
113 for(i = m - 1; 0 < i; --i) {
114 t = SA[0], SA[0] = SA[i];
115 tr_fixdown(ISAd, SA, 0, i);
116 SA[i] = t;
117 }
118 }
119
120
121 /*---------------------------------------------------------------------------*/
122
123 /* Returns the median of three elements. */
124 inline
125 saidx_it
126 tr_median3(const_saidx_it ISAd, saidx_it v1, saidx_it v2, saidx_it v3) {
127 saidx_it t;
128 if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
129 if(ISAd[*v2] > ISAd[*v3]) {
130 if(ISAd[*v1] > ISAd[*v3]) { return v1; }
131 else { return v3; }
132 }
133 return v2;
134 }
135
136 /* Returns the median of five elements. */
137 inline
138 saidx_it
139 tr_median5(const_saidx_it ISAd,
140 saidx_it v1, saidx_it v2, saidx_it v3, saidx_it v4, saidx_it v5) {
141 saidx_it t;
142 if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
143 if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
144 if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
145 if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
146 if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
147 if(ISAd[*v3] > ISAd[*v4]) { return v4; }
148 return v3;
149 }
150
151 /* Returns the pivot element. */
152 inline
153 saidx_it
154 tr_pivot(const_saidx_it ISAd, saidx_it first, saidx_it last) {
155 saidx_it middle;
156 saidx_t t;
157
158 t = last - first;
159 middle = first + t / 2;
160
161 if(t <= 512) {
162 if(t <= 32) {
163 return tr_median3(ISAd, first, middle, last - 1);
164 } else {
165 t >>= 2;
166 return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
167 }
168 }
169 t >>= 3;
170 first = tr_median3(ISAd, first, first + t, first + (t << 1));
171 middle = tr_median3(ISAd, middle - t, middle, middle + t);
172 last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
173 return tr_median3(ISAd, first, middle, last);
174 }
175
176
177 /*---------------------------------------------------------------------------*/
178
179 typedef struct _trbudget_t trbudget_t;
180 struct _trbudget_t {
181 saidx_t chance;
182 saidx_t remain;
183 saidx_t incval;
184 saidx_t count;
185 };
186
187 inline
188 void
189 trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
190 budget->chance = chance;
191 budget->remain = budget->incval = incval;
192 }
193
194 inline
195 saint_t
196 trbudget_check(trbudget_t *budget, saidx_t size) {
197 if(size <= budget->remain) { budget->remain -= size; return 1; }
198 if(budget->chance == 0) { budget->count += size; return 0; }
199 budget->remain += budget->incval - size;
200 budget->chance -= 1;
201 return 1;
202 }
203
204
205 /*---------------------------------------------------------------------------*/
206
207 inline
208 void
209 tr_partition(const_saidx_it ISAd,
210 saidx_it first, saidx_it middle, saidx_it last,
211 saidx_it* pa, saidx_it* pb, saidx_t v) {
212 saidx_it a, b, c, d, e, f;
213 saidx_t t, s;
214 saidx_t x = 0;
215
216 for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
217 if(((a = b) < last) && (x < v)) {
218 for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
219 if(x == v) { SWAP(*b, *a); ++a; }
220 }
221 }
222 for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
223 if((b < (d = c)) && (x > v)) {
224 for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
225 if(x == v) { SWAP(*c, *d); --d; }
226 }
227 }
228 for(; b < c;) {
229 SWAP(*b, *c);
230 for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
231 if(x == v) { SWAP(*b, *a); ++a; }
232 }
233 for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
234 if(x == v) { SWAP(*c, *d); --d; }
235 }
236 }
237
238 if(a <= d) {
239 c = b - 1;
240 if((s = a - first) > (t = b - a)) { s = t; }
241 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
242 if((s = d - c) > (t = last - d - 1)) { s = t; }
243 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
244 first += (b - a), last -= (d - c);
245 }
246 *pa = first, *pb = last;
247 }
248
249 void
250 tr_copy(saidx_it ISA, const_saidx_it SA,
251 saidx_it first, saidx_it a, saidx_it b, saidx_it last,
252 saidx_t depth) {
253 /* sort suffixes of middle partition
254 by using sorted order of suffixes of left and right partition. */
255 saidx_it c, d, e;
256 saidx_t s, v;
257
258 v = b - SA - 1;
259 for(c = first, d = a - 1; c <= d; ++c) {
260 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
261 *++d = s;
262 ISA[s] = d - SA;
263 }
264 }
265 for(c = last - 1, e = d + 1, d = b; e < d; --c) {
266 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
267 *--d = s;
268 ISA[s] = d - SA;
269 }
270 }
271 }
272
273 void
274 tr_partialcopy(saidx_it ISA, const_saidx_it SA,
275 saidx_it first, saidx_it a, saidx_it b, saidx_it last,
276 saidx_t depth) {
277 saidx_it c, d, e;
278 saidx_t s, v;
279 saidx_t rank, lastrank, newrank = -1;
280
281 v = b - SA - 1;
282 lastrank = -1;
283 for(c = first, d = a - 1; c <= d; ++c) {
284 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
285 *++d = s;
286 rank = ISA[s + depth];
287 if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
288 ISA[s] = newrank;
289 }
290 }
291
292 lastrank = -1;
293 for(e = d; first <= e; --e) {
294 rank = ISA[*e];
295 if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
296 if(newrank != rank) { ISA[*e] = newrank; }
297 }
298
299 lastrank = -1;
300 for(c = last - 1, e = d + 1, d = b; e < d; --c) {
301 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
302 *--d = s;
303 rank = ISA[s + depth];
304 if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
305 ISA[s] = newrank;
306 }
307 }
308 }
309
310 void
311 tr_introsort(saidx_it ISA, const_saidx_it ISAd,
312 saidx_it SA, saidx_it first, saidx_it last,
313 trbudget_t *budget) {
314 #define STACK_SIZE TR_STACKSIZE
315 struct { const_saidx_it a; saidx_it b, c; saint_t d, e; }stack[STACK_SIZE];
316 saidx_it a, b, c;
317 saidx_t t;
318 saidx_t v, x = 0;
319 saidx_t incr = ISAd - ISA;
320 saint_t limit, next;
321 saint_t ssize, trlink = -1;
322
323 for(ssize = 0, limit = tr_ilg(last - first);;) {
324
325 if(limit < 0) {
326 if(limit == -1) {
327 /* tandem repeat partition */
328 tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
329
330 /* update ranks */
331 if(a < last) {
332 for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
333 }
334 if(b < last) {
335 for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
336 }
337
338 /* push */
339 if(1 < (b - a)) {
340 STACK_PUSH5(NULL, a, b, 0, 0);
341 STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
342 trlink = ssize - 2;
343 }
344 if((a - first) <= (last - b)) {
345 if(1 < (a - first)) {
346 STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
347 last = a, limit = tr_ilg(a - first);
348 } else if(1 < (last - b)) {
349 first = b, limit = tr_ilg(last - b);
350 } else {
351 STACK_POP5(ISAd, first, last, limit, trlink);
352 }
353 } else {
354 if(1 < (last - b)) {
355 STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
356 first = b, limit = tr_ilg(last - b);
357 } else if(1 < (a - first)) {
358 last = a, limit = tr_ilg(a - first);
359 } else {
360 STACK_POP5(ISAd, first, last, limit, trlink);
361 }
362 }
363 } else if(limit == -2) {
364 /* tandem repeat copy */
365 a = stack[--ssize].b, b = stack[ssize].c;
366 if(stack[ssize].d == 0) {
367 tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
368 } else {
369 if(0 <= trlink) { stack[trlink].d = -1; }
370 tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
371 }
372 STACK_POP5(ISAd, first, last, limit, trlink);
373 } else {
374 /* sorted partition */
375 if(0 <= *first) {
376 a = first;
377 do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
378 first = a;
379 }
380 if(first < last) {
381 a = first; do { *a = ~*a; } while(*++a < 0);
382 next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
383 if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
384
385 /* push */
386 if(trbudget_check(budget, a - first)) {
387 if((a - first) <= (last - a)) {
388 STACK_PUSH5(ISAd, a, last, -3, trlink);
389 ISAd += incr, last = a, limit = next;
390 } else {
391 if(1 < (last - a)) {
392 STACK_PUSH5(ISAd + incr, first, a, next, trlink);
393 first = a, limit = -3;
394 } else {
395 ISAd += incr, last = a, limit = next;
396 }
397 }
398 } else {
399 if(0 <= trlink) { stack[trlink].d = -1; }
400 if(1 < (last - a)) {
401 first = a, limit = -3;
402 } else {
403 STACK_POP5(ISAd, first, last, limit, trlink);
404 }
405 }
406 } else {
407 STACK_POP5(ISAd, first, last, limit, trlink);
408 }
409 }
410 continue;
411 }
412
413 if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
414 tr_insertionsort(ISAd, first, last);
415 limit = -3;
416 continue;
417 }
418
419 if(limit-- == 0) {
420 tr_heapsort(ISAd, first, last - first);
421 for(a = last - 1; first < a; a = b) {
422 for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
423 }
424 limit = -3;
425 continue;
426 }
427
428 /* choose pivot */
429 a = tr_pivot(ISAd, first, last);
430 SWAP(*first, *a);
431 v = ISAd[*first];
432
433 /* partition */
434 tr_partition(ISAd, first, first + 1, last, &a, &b, v);
435 if((last - first) != (b - a)) {
436 next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
437
438 /* update ranks */
439 for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
440 if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
441
442 /* push */
443 if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
444 if((a - first) <= (last - b)) {
445 if((last - b) <= (b - a)) {
446 if(1 < (a - first)) {
447 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
448 STACK_PUSH5(ISAd, b, last, limit, trlink);
449 last = a;
450 } else if(1 < (last - b)) {
451 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
452 first = b;
453 } else {
454 ISAd += incr, first = a, last = b, limit = next;
455 }
456 } else if((a - first) <= (b - a)) {
457 if(1 < (a - first)) {
458 STACK_PUSH5(ISAd, b, last, limit, trlink);
459 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
460 last = a;
461 } else {
462 STACK_PUSH5(ISAd, b, last, limit, trlink);
463 ISAd += incr, first = a, last = b, limit = next;
464 }
465 } else {
466 STACK_PUSH5(ISAd, b, last, limit, trlink);
467 STACK_PUSH5(ISAd, first, a, limit, trlink);
468 ISAd += incr, first = a, last = b, limit = next;
469 }
470 } else {
471 if((a - first) <= (b - a)) {
472 if(1 < (last - b)) {
473 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
474 STACK_PUSH5(ISAd, first, a, limit, trlink);
475 first = b;
476 } else if(1 < (a - first)) {
477 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
478 last = a;
479 } else {
480 ISAd += incr, first = a, last = b, limit = next;
481 }
482 } else if((last - b) <= (b - a)) {
483 if(1 < (last - b)) {
484 STACK_PUSH5(ISAd, first, a, limit, trlink);
485 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
486 first = b;
487 } else {
488 STACK_PUSH5(ISAd, first, a, limit, trlink);
489 ISAd += incr, first = a, last = b, limit = next;
490 }
491 } else {
492 STACK_PUSH5(ISAd, first, a, limit, trlink);
493 STACK_PUSH5(ISAd, b, last, limit, trlink);
494 ISAd += incr, first = a, last = b, limit = next;
495 }
496 }
497 } else {
498 if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
499 if((a - first) <= (last - b)) {
500 if(1 < (a - first)) {
501 STACK_PUSH5(ISAd, b, last, limit, trlink);
502 last = a;
503 } else if(1 < (last - b)) {
504 first = b;
505 } else {
506 STACK_POP5(ISAd, first, last, limit, trlink);
507 }
508 } else {
509 if(1 < (last - b)) {
510 STACK_PUSH5(ISAd, first, a, limit, trlink);
511 first = b;
512 } else if(1 < (a - first)) {
513 last = a;
514 } else {
515 STACK_POP5(ISAd, first, last, limit, trlink);
516 }
517 }
518 }
519 } else {
520 if(trbudget_check(budget, last - first)) {
521 limit = tr_ilg(last - first), ISAd += incr;
522 } else {
523 if(0 <= trlink) { stack[trlink].d = -1; }
524 STACK_POP5(ISAd, first, last, limit, trlink);
525 }
526 }
527 }
528 #undef STACK_SIZE
529 }
530
531 } // namespace
532
533 /*---------------------------------------------------------------------------*/
534
535 /*- Function -*/
536
537 /* Tandem repeat sort */
538 void
539 trsort(saidx_it ISA, saidx_it SA, saidx_t n, saidx_t depth) {
540 saidx_it ISAd;
541 saidx_it first, last;
542 trbudget_t budget;
543 saidx_t t, skip, unsorted;
544
545 trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
546 /* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
547 for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
548 first = SA;
549 skip = 0;
550 unsorted = 0;
551 do {
552 if((t = *first) < 0) { first -= t; skip += t; }
553 else {
554 if(skip != 0) { *(first + skip) = skip; skip = 0; }
555 last = SA + ISA[t] + 1;
556 if(1 < (last - first)) {
557 budget.count = 0;
558 tr_introsort(ISA, ISAd, SA, first, last, &budget);
559 if(budget.count != 0) { unsorted += budget.count; }
560 else { skip = first - last; }
561 } else if((last - first) == 1) {
562 skip = -1;
563 }
564 first = last;
565 }
566 } while(first < (SA + n));
567 if(skip != 0) { *(first + skip) = skip; }
568 if(unsorted == 0) { break; }
569 }
570 }
571
572 } // namespace divsuf
OLDNEW
« no previous file with comments | « courgette/third_party/divsufsort/sssort.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698