Index: courgette/third_party/divsufsort/divsufsort.cc |
diff --git a/courgette/third_party/divsufsort/divsufsort.cc b/courgette/third_party/divsufsort/divsufsort.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..461b6b8ced99eb76245e5a9942056e5ea1b79b2f |
--- /dev/null |
+++ b/courgette/third_party/divsufsort/divsufsort.cc |
@@ -0,0 +1,251 @@ |
+// Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. |
+// |
+// For the terms under which this work may be distributed, please see |
+// the adjoining file "LICENSE". |
+// |
+// ChangeLog: |
+// 2016-07-22 - Initial commit and adaption to use PagedArray. |
+// --Samuel Huang <huangs@chromium.org> |
+ |
+#include "courgette/third_party/divsufsort/divsufsort_private.h" |
+ |
+#include <stdlib.h> |
+ |
+#define BUCKET_A_SIZE (ALPHABET_SIZE) |
+#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) |
+ |
+#define BUCKET_A(_c0) bucket_A[(_c0)] |
+#if ALPHABET_SIZE == 256 |
+#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) |
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) |
+#else |
+#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) |
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) |
+#endif |
+ |
+namespace divsuf { |
+ |
+/*- Private Functions -*/ |
+ |
+namespace { |
+ |
+/* Sorts suffixes of type B*. */ |
+saidx_t |
+sort_typeBstar(const sauchar_t *T, saidx_it SA, |
+ saidx_t *bucket_A, saidx_t *bucket_B, |
+ saidx_t n) { |
+ saidx_it PAb, ISAb, buf; |
+ saidx_t i, j, k, t, m, bufsize; |
+ saint_t c0, c1; |
+ |
+ /* Initialize bucket arrays. */ |
+ for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } |
+ for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } |
+ |
+ /* Count the number of occurrences of the first one or two characters of each |
+ type A, B and B* suffix. Moreover, store the beginning position of all |
+ type B* suffixes into the array SA. */ |
+ for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { |
+ /* type A suffix. */ |
+ do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); |
+ if(0 <= i) { |
+ /* type B* suffix. */ |
+ ++BUCKET_BSTAR(c0, c1); |
+ SA[--m] = i; |
+ /* type B suffix. */ |
+ for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { |
+ ++BUCKET_B(c0, c1); |
+ } |
+ } |
+ } |
+ m = n - m; |
+/* |
+note: |
+ A type B* suffix is lexicographically smaller than a type B suffix that |
+ begins with the same first two characters. |
+*/ |
+ |
+ /* Calculate the index of start/end point of each bucket. */ |
+ for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { |
+ t = i + BUCKET_A(c0); |
+ BUCKET_A(c0) = i + j; /* start point */ |
+ i = t + BUCKET_B(c0, c0); |
+ for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { |
+ j += BUCKET_BSTAR(c0, c1); |
+ BUCKET_BSTAR(c0, c1) = j; /* end point */ |
+ i += BUCKET_B(c0, c1); |
+ } |
+ } |
+ |
+ if(0 < m) { |
+ /* Sort the type B* suffixes by their first two characters. */ |
+ PAb = SA + n - m; ISAb = SA + m; |
+ for(i = m - 2; 0 <= i; --i) { |
+ t = PAb[i], c0 = T[t], c1 = T[t + 1]; |
+ SA[--BUCKET_BSTAR(c0, c1)] = i; |
+ } |
+ t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; |
+ SA[--BUCKET_BSTAR(c0, c1)] = m - 1; |
+ |
+ /* Sort the type B* substrings using sssort. */ |
+ buf = SA + m, bufsize = n - (2 * m); |
+ for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { |
+ for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { |
+ i = BUCKET_BSTAR(c0, c1); |
+ if(1 < (j - i)) { |
+ sssort(T, PAb, SA + i, SA + j, |
+ buf, bufsize, 2, n, *(SA + i) == (m - 1)); |
+ } |
+ } |
+ } |
+ |
+ /* Compute ranks of type B* substrings. */ |
+ for(i = m - 1; 0 <= i; --i) { |
+ if(0 <= SA[i]) { |
+ j = i; |
+ do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); |
+ SA[i + 1] = i - j; |
+ if(i <= 0) { break; } |
+ } |
+ j = i; |
+ do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); |
+ ISAb[SA[i]] = j; |
+ } |
+ |
+ /* Construct the inverse suffix array of type B* suffixes using trsort. */ |
+ trsort(ISAb, SA, m, 1); |
+ |
+ /* Set the sorted order of tyoe B* suffixes. */ |
+ for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { |
+ for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } |
+ if(0 <= i) { |
+ t = i; |
+ for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } |
+ SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; |
+ } |
+ } |
+ |
+ /* Calculate the index of start/end point of each bucket. */ |
+ BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ |
+ for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { |
+ i = BUCKET_A(c0 + 1) - 1; |
+ for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { |
+ t = i - BUCKET_B(c0, c1); |
+ BUCKET_B(c0, c1) = i; /* end point */ |
+ |
+ /* Move all type B* suffixes to the correct position. */ |
+ for(i = t, j = BUCKET_BSTAR(c0, c1); |
+ j <= k; |
+ --i, --k) { SA[i] = SA[k]; } |
+ } |
+ BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ |
+ BUCKET_B(c0, c0) = i; /* end point */ |
+ } |
+ } |
+ |
+ return m; |
+} |
+ |
+/* Constructs the suffix array by using the sorted order of type B* suffixes. */ |
+void |
+construct_SA(const sauchar_t *T, saidx_it SA, |
+ saidx_t *bucket_A, saidx_t *bucket_B, |
+ saidx_t n, saidx_t m) { |
+ saidx_it i, j, k; |
+ saidx_t s; |
+ saint_t c0, c1, c2; |
+ |
+ if(0 < m) { |
+ /* Construct the sorted order of type B suffixes by using |
+ the sorted order of type B* suffixes. */ |
+ for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { |
+ /* Scan the suffix array from right to left. */ |
+ for(i = SA + BUCKET_BSTAR(c1, c1 + 1), |
+ j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; |
+ i <= j; |
+ --j) { |
+ if(0 < (s = *j)) { |
+ assert(T[s] == c1); |
+ assert(((s + 1) < n) && (T[s] <= T[s + 1])); |
+ assert(T[s - 1] <= T[s]); |
+ *j = ~s; |
+ c0 = T[--s]; |
+ if((0 < s) && (T[s - 1] > c0)) { s = ~s; } |
+ if(c0 != c2) { |
+ if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } |
+ k = SA + BUCKET_B(c2 = c0, c1); |
+ } |
+ assert(k < j); |
+ *k-- = s; |
+ } else { |
+ assert(((s == 0) && (T[s] == c1)) || (s < 0)); |
+ *j = ~s; |
+ } |
+ } |
+ } |
+ } |
+ |
+ /* Construct the suffix array by using |
+ the sorted order of type B suffixes. */ |
+ k = SA + BUCKET_A(c2 = T[n - 1]); |
+ *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); |
+ /* Scan the suffix array from left to right. */ |
+ for(i = SA, j = SA + n; i < j; ++i) { |
+ if(0 < (s = *i)) { |
+ assert(T[s - 1] >= T[s]); |
+ c0 = T[--s]; |
+ if((s == 0) || (T[s - 1] < c0)) { s = ~s; } |
+ if(c0 != c2) { |
+ BUCKET_A(c2) = k - SA; |
+ k = SA + BUCKET_A(c2 = c0); |
+ } |
+ assert(i < k); |
+ *k++ = s; |
+ } else { |
+ assert(s < 0); |
+ *i = ~s; |
+ } |
+ } |
+} |
+ |
+} // namespace |
+ |
+/*---------------------------------------------------------------------------*/ |
+ |
+/*- Function -*/ |
+ |
+saint_t |
+divsufsort(const sauchar_t *T, saidx_it SA, saidx_t n) { |
+ saidx_t *bucket_A, *bucket_B; |
+ saidx_t m; |
+ saint_t err = 0; |
+ |
+ /* Check arguments. */ |
+ if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } |
+ else if(n == 0) { return 0; } |
+ else if(n == 1) { SA[0] = 0; return 0; } |
+ else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } |
+ |
+ bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); |
+ bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); |
+ |
+ /* Suffixsort. */ |
+ if((bucket_A != NULL) && (bucket_B != NULL)) { |
+ m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); |
+ construct_SA(T, SA, bucket_A, bucket_B, n, m); |
+ } else { |
+ err = -2; |
+ } |
+ |
+ free(bucket_B); |
+ free(bucket_A); |
+ |
+ return err; |
+} |
+ |
+saint_t divsufsort_include_empty(const sauchar_t *T, saidx_it SA, saidx_t n) { |
+ SA[0] = n; // Manually add the empty string suffix. |
+ return divsufsort(T, SA + 1, n); |
+} |
+ |
+} // namespace divsuf |