Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(601)

Side by Side Diff: courgette/third_party/divsufsort/trsort.cc

Issue 2156223002: [Courgette] Add third party-library: libdivsufsort. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Fix typo in README.chromium: LICEN*S*E. Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « courgette/third_party/divsufsort/sssort.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person
4 // obtaining a copy of this software and associated documentation
5 // files (the "Software"), to deal in the Software without
6 // restriction, including without limitation the rights to use,
7 // copy, modify, merge, publish, distribute, sublicense, and/or sell
8 // copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following
10 // conditions:
11 //
12 // The above copyright notice and this permission notice shall be
13 // included in all copies or substantial portions of the Software.
14 //
15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 // OTHER DEALINGS IN THE SOFTWARE.
23 //
24 // ChangeLog:
25 // 2016-07-22 - Initial commit and adaption to use PagedArray.
26 // --Samuel Huang <huangs@chromium.org>
27
28 #include "courgette/third_party/divsufsort/divsufsort_private.h"
29
30 #define TR_INSERTIONSORT_THRESHOLD (8)
31 #define TR_STACKSIZE (64)
32
33 #define STACK_PUSH5(_a, _b, _c, _d, _e)\
34 do {\
35 assert(ssize < STACK_SIZE);\
36 stack[ssize].a = (_a), stack[ssize].b = (_b),\
37 stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
38 } while(0)
39 #define STACK_POP5(_a, _b, _c, _d, _e)\
40 do {\
41 assert(0 <= ssize);\
42 if(ssize == 0) { return; }\
43 (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
44 (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
45 } while(0)
46
47
48 namespace divsuf {
49
50 namespace {
51
52 /*- Private Functions -*/
53
54 const saint_t lg_table[256]= {
55 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
56 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
57 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
58 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
59 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
60 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
61 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
62 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
63 };
64
65 inline
66 saint_t
67 tr_ilg(saidx_t n) {
68 return (n & 0xffff0000) ?
69 ((n & 0xff000000) ?
70 24 + lg_table[(n >> 24) & 0xff] :
71 16 + lg_table[(n >> 16) & 0xff]) :
72 ((n & 0x0000ff00) ?
73 8 + lg_table[(n >> 8) & 0xff] :
74 0 + lg_table[(n >> 0) & 0xff]);
75 }
76
77
78 /*---------------------------------------------------------------------------*/
79
80 /* Simple insertionsort for small size groups. */
81 void
82 tr_insertionsort(const_saidx_it ISAd, saidx_it first, saidx_it last) {
83 saidx_it a, b;
84 saidx_t t, r;
85
86 for(a = first + 1; a < last; ++a) {
87 for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
88 do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
89 if(b < first) { break; }
90 }
91 if(r == 0) { *b = ~*b; }
92 *(b + 1) = t;
93 }
94 }
95
96
97 /*---------------------------------------------------------------------------*/
98
99 inline
100 void
101 tr_fixdown(const_saidx_it ISAd, saidx_it SA, saidx_t i, saidx_t size) {
102 saidx_t j, k;
103 saidx_t v;
104 saidx_t c, d, e;
105
106 for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
107 d = ISAd[SA[k = j++]];
108 if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
109 if(d <= c) { break; }
110 }
111 SA[i] = v;
112 }
113
114 /* Simple top-down heapsort. */
115 void
116 tr_heapsort(const_saidx_it ISAd, saidx_it SA, saidx_t size) {
117 saidx_t i, m;
118 saidx_t t;
119
120 m = size;
121 if((size % 2) == 0) {
122 m--;
123 if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
124 }
125
126 for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
127 if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
128 for(i = m - 1; 0 < i; --i) {
129 t = SA[0], SA[0] = SA[i];
130 tr_fixdown(ISAd, SA, 0, i);
131 SA[i] = t;
132 }
133 }
134
135
136 /*---------------------------------------------------------------------------*/
137
138 /* Returns the median of three elements. */
139 inline
140 saidx_it
141 tr_median3(const_saidx_it ISAd, saidx_it v1, saidx_it v2, saidx_it v3) {
142 saidx_it t;
143 if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
144 if(ISAd[*v2] > ISAd[*v3]) {
145 if(ISAd[*v1] > ISAd[*v3]) { return v1; }
146 else { return v3; }
147 }
148 return v2;
149 }
150
151 /* Returns the median of five elements. */
152 inline
153 saidx_it
154 tr_median5(const_saidx_it ISAd,
155 saidx_it v1, saidx_it v2, saidx_it v3, saidx_it v4, saidx_it v5) {
156 saidx_it t;
157 if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
158 if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
159 if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
160 if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
161 if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
162 if(ISAd[*v3] > ISAd[*v4]) { return v4; }
163 return v3;
164 }
165
166 /* Returns the pivot element. */
167 inline
168 saidx_it
169 tr_pivot(const_saidx_it ISAd, saidx_it first, saidx_it last) {
170 saidx_it middle;
171 saidx_t t;
172
173 t = last - first;
174 middle = first + t / 2;
175
176 if(t <= 512) {
177 if(t <= 32) {
178 return tr_median3(ISAd, first, middle, last - 1);
179 } else {
180 t >>= 2;
181 return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
182 }
183 }
184 t >>= 3;
185 first = tr_median3(ISAd, first, first + t, first + (t << 1));
186 middle = tr_median3(ISAd, middle - t, middle, middle + t);
187 last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
188 return tr_median3(ISAd, first, middle, last);
189 }
190
191
192 /*---------------------------------------------------------------------------*/
193
194 typedef struct _trbudget_t trbudget_t;
195 struct _trbudget_t {
196 saidx_t chance;
197 saidx_t remain;
198 saidx_t incval;
199 saidx_t count;
200 };
201
202 inline
203 void
204 trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
205 budget->chance = chance;
206 budget->remain = budget->incval = incval;
207 }
208
209 inline
210 saint_t
211 trbudget_check(trbudget_t *budget, saidx_t size) {
212 if(size <= budget->remain) { budget->remain -= size; return 1; }
213 if(budget->chance == 0) { budget->count += size; return 0; }
214 budget->remain += budget->incval - size;
215 budget->chance -= 1;
216 return 1;
217 }
218
219
220 /*---------------------------------------------------------------------------*/
221
222 inline
223 void
224 tr_partition(const_saidx_it ISAd,
225 saidx_it first, saidx_it middle, saidx_it last,
226 saidx_it* pa, saidx_it* pb, saidx_t v) {
227 saidx_it a, b, c, d, e, f;
228 saidx_t t, s;
229 saidx_t x = 0;
230
231 for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
232 if(((a = b) < last) && (x < v)) {
233 for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
234 if(x == v) { SWAP(*b, *a); ++a; }
235 }
236 }
237 for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
238 if((b < (d = c)) && (x > v)) {
239 for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
240 if(x == v) { SWAP(*c, *d); --d; }
241 }
242 }
243 for(; b < c;) {
244 SWAP(*b, *c);
245 for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
246 if(x == v) { SWAP(*b, *a); ++a; }
247 }
248 for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
249 if(x == v) { SWAP(*c, *d); --d; }
250 }
251 }
252
253 if(a <= d) {
254 c = b - 1;
255 if((s = a - first) > (t = b - a)) { s = t; }
256 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
257 if((s = d - c) > (t = last - d - 1)) { s = t; }
258 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
259 first += (b - a), last -= (d - c);
260 }
261 *pa = first, *pb = last;
262 }
263
264 void
265 tr_copy(saidx_it ISA, const_saidx_it SA,
266 saidx_it first, saidx_it a, saidx_it b, saidx_it last,
267 saidx_t depth) {
268 /* sort suffixes of middle partition
269 by using sorted order of suffixes of left and right partition. */
270 saidx_it c, d, e;
271 saidx_t s, v;
272
273 v = b - SA - 1;
274 for(c = first, d = a - 1; c <= d; ++c) {
275 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
276 *++d = s;
277 ISA[s] = d - SA;
278 }
279 }
280 for(c = last - 1, e = d + 1, d = b; e < d; --c) {
281 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
282 *--d = s;
283 ISA[s] = d - SA;
284 }
285 }
286 }
287
288 void
289 tr_partialcopy(saidx_it ISA, const_saidx_it SA,
290 saidx_it first, saidx_it a, saidx_it b, saidx_it last,
291 saidx_t depth) {
292 saidx_it c, d, e;
293 saidx_t s, v;
294 saidx_t rank, lastrank, newrank = -1;
295
296 v = b - SA - 1;
297 lastrank = -1;
298 for(c = first, d = a - 1; c <= d; ++c) {
299 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
300 *++d = s;
301 rank = ISA[s + depth];
302 if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
303 ISA[s] = newrank;
304 }
305 }
306
307 lastrank = -1;
308 for(e = d; first <= e; --e) {
309 rank = ISA[*e];
310 if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
311 if(newrank != rank) { ISA[*e] = newrank; }
312 }
313
314 lastrank = -1;
315 for(c = last - 1, e = d + 1, d = b; e < d; --c) {
316 if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
317 *--d = s;
318 rank = ISA[s + depth];
319 if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
320 ISA[s] = newrank;
321 }
322 }
323 }
324
325 void
326 tr_introsort(saidx_it ISA, const_saidx_it ISAd,
327 saidx_it SA, saidx_it first, saidx_it last,
328 trbudget_t *budget) {
329 #define STACK_SIZE TR_STACKSIZE
330 struct { const_saidx_it a; saidx_it b, c; saint_t d, e; }stack[STACK_SIZE];
331 saidx_it a, b, c;
332 saidx_t t;
333 saidx_t v, x = 0;
334 saidx_t incr = ISAd - ISA;
335 saint_t limit, next;
336 saint_t ssize, trlink = -1;
337
338 for(ssize = 0, limit = tr_ilg(last - first);;) {
339
340 if(limit < 0) {
341 if(limit == -1) {
342 /* tandem repeat partition */
343 tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
344
345 /* update ranks */
346 if(a < last) {
347 for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
348 }
349 if(b < last) {
350 for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
351 }
352
353 /* push */
354 if(1 < (b - a)) {
355 STACK_PUSH5(NULL, a, b, 0, 0);
356 STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
357 trlink = ssize - 2;
358 }
359 if((a - first) <= (last - b)) {
360 if(1 < (a - first)) {
361 STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
362 last = a, limit = tr_ilg(a - first);
363 } else if(1 < (last - b)) {
364 first = b, limit = tr_ilg(last - b);
365 } else {
366 STACK_POP5(ISAd, first, last, limit, trlink);
367 }
368 } else {
369 if(1 < (last - b)) {
370 STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
371 first = b, limit = tr_ilg(last - b);
372 } else if(1 < (a - first)) {
373 last = a, limit = tr_ilg(a - first);
374 } else {
375 STACK_POP5(ISAd, first, last, limit, trlink);
376 }
377 }
378 } else if(limit == -2) {
379 /* tandem repeat copy */
380 a = stack[--ssize].b, b = stack[ssize].c;
381 if(stack[ssize].d == 0) {
382 tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
383 } else {
384 if(0 <= trlink) { stack[trlink].d = -1; }
385 tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
386 }
387 STACK_POP5(ISAd, first, last, limit, trlink);
388 } else {
389 /* sorted partition */
390 if(0 <= *first) {
391 a = first;
392 do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
393 first = a;
394 }
395 if(first < last) {
396 a = first; do { *a = ~*a; } while(*++a < 0);
397 next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
398 if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
399
400 /* push */
401 if(trbudget_check(budget, a - first)) {
402 if((a - first) <= (last - a)) {
403 STACK_PUSH5(ISAd, a, last, -3, trlink);
404 ISAd += incr, last = a, limit = next;
405 } else {
406 if(1 < (last - a)) {
407 STACK_PUSH5(ISAd + incr, first, a, next, trlink);
408 first = a, limit = -3;
409 } else {
410 ISAd += incr, last = a, limit = next;
411 }
412 }
413 } else {
414 if(0 <= trlink) { stack[trlink].d = -1; }
415 if(1 < (last - a)) {
416 first = a, limit = -3;
417 } else {
418 STACK_POP5(ISAd, first, last, limit, trlink);
419 }
420 }
421 } else {
422 STACK_POP5(ISAd, first, last, limit, trlink);
423 }
424 }
425 continue;
426 }
427
428 if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
429 tr_insertionsort(ISAd, first, last);
430 limit = -3;
431 continue;
432 }
433
434 if(limit-- == 0) {
435 tr_heapsort(ISAd, first, last - first);
436 for(a = last - 1; first < a; a = b) {
437 for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
438 }
439 limit = -3;
440 continue;
441 }
442
443 /* choose pivot */
444 a = tr_pivot(ISAd, first, last);
445 SWAP(*first, *a);
446 v = ISAd[*first];
447
448 /* partition */
449 tr_partition(ISAd, first, first + 1, last, &a, &b, v);
450 if((last - first) != (b - a)) {
451 next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
452
453 /* update ranks */
454 for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
455 if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
456
457 /* push */
458 if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
459 if((a - first) <= (last - b)) {
460 if((last - b) <= (b - a)) {
461 if(1 < (a - first)) {
462 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
463 STACK_PUSH5(ISAd, b, last, limit, trlink);
464 last = a;
465 } else if(1 < (last - b)) {
466 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
467 first = b;
468 } else {
469 ISAd += incr, first = a, last = b, limit = next;
470 }
471 } else if((a - first) <= (b - a)) {
472 if(1 < (a - first)) {
473 STACK_PUSH5(ISAd, b, last, limit, trlink);
474 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
475 last = a;
476 } else {
477 STACK_PUSH5(ISAd, b, last, limit, trlink);
478 ISAd += incr, first = a, last = b, limit = next;
479 }
480 } else {
481 STACK_PUSH5(ISAd, b, last, limit, trlink);
482 STACK_PUSH5(ISAd, first, a, limit, trlink);
483 ISAd += incr, first = a, last = b, limit = next;
484 }
485 } else {
486 if((a - first) <= (b - a)) {
487 if(1 < (last - b)) {
488 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
489 STACK_PUSH5(ISAd, first, a, limit, trlink);
490 first = b;
491 } else if(1 < (a - first)) {
492 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
493 last = a;
494 } else {
495 ISAd += incr, first = a, last = b, limit = next;
496 }
497 } else if((last - b) <= (b - a)) {
498 if(1 < (last - b)) {
499 STACK_PUSH5(ISAd, first, a, limit, trlink);
500 STACK_PUSH5(ISAd + incr, a, b, next, trlink);
501 first = b;
502 } else {
503 STACK_PUSH5(ISAd, first, a, limit, trlink);
504 ISAd += incr, first = a, last = b, limit = next;
505 }
506 } else {
507 STACK_PUSH5(ISAd, first, a, limit, trlink);
508 STACK_PUSH5(ISAd, b, last, limit, trlink);
509 ISAd += incr, first = a, last = b, limit = next;
510 }
511 }
512 } else {
513 if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
514 if((a - first) <= (last - b)) {
515 if(1 < (a - first)) {
516 STACK_PUSH5(ISAd, b, last, limit, trlink);
517 last = a;
518 } else if(1 < (last - b)) {
519 first = b;
520 } else {
521 STACK_POP5(ISAd, first, last, limit, trlink);
522 }
523 } else {
524 if(1 < (last - b)) {
525 STACK_PUSH5(ISAd, first, a, limit, trlink);
526 first = b;
527 } else if(1 < (a - first)) {
528 last = a;
529 } else {
530 STACK_POP5(ISAd, first, last, limit, trlink);
531 }
532 }
533 }
534 } else {
535 if(trbudget_check(budget, last - first)) {
536 limit = tr_ilg(last - first), ISAd += incr;
537 } else {
538 if(0 <= trlink) { stack[trlink].d = -1; }
539 STACK_POP5(ISAd, first, last, limit, trlink);
540 }
541 }
542 }
543 #undef STACK_SIZE
544 }
545
546 } // namespace
547
548 /*---------------------------------------------------------------------------*/
549
550 /*- Function -*/
551
552 /* Tandem repeat sort */
553 void
554 trsort(saidx_it ISA, saidx_it SA, saidx_t n, saidx_t depth) {
555 saidx_it ISAd;
556 saidx_it first, last;
557 trbudget_t budget;
558 saidx_t t, skip, unsorted;
559
560 trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
561 /* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
562 for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
563 first = SA;
564 skip = 0;
565 unsorted = 0;
566 do {
567 if((t = *first) < 0) { first -= t; skip += t; }
568 else {
569 if(skip != 0) { *(first + skip) = skip; skip = 0; }
570 last = SA + ISA[t] + 1;
571 if(1 < (last - first)) {
572 budget.count = 0;
573 tr_introsort(ISA, ISAd, SA, first, last, &budget);
574 if(budget.count != 0) { unsorted += budget.count; }
575 else { skip = first - last; }
576 } else if((last - first) == 1) {
577 skip = -1;
578 }
579 first = last;
580 }
581 } while(first < (SA + n));
582 if(skip != 0) { *(first + skip) = skip; }
583 if(unsorted == 0) { break; }
584 }
585 }
586
587 } // namespace divsuf
OLDNEW
« no previous file with comments | « courgette/third_party/divsufsort/sssort.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698