Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(49)

Side by Side Diff: courgette/third_party/qsufsort.h

Issue 1914303002: [Courgette] Fix sorting in QSufSort's split(). (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Comment nit. Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 qsufsort.h -- Suffix array generation. 2 qsufsort.h -- Suffix array generation.
3 3
4 Copyright 2003 Colin Percival 4 Copyright 2003 Colin Percival
5 5
6 For the terms under which this work may be distributed, please see 6 For the terms under which this work may be distributed, please see
7 the adjoining file "LICENSE". 7 the adjoining file "LICENSE".
8 8
9 ChangeLog: 9 ChangeLog:
10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit 10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
11 values throughout. 11 values throughout.
12 --Benjamin Smedberg <benjamin@smedbergs.us> 12 --Benjamin Smedberg <benjamin@smedbergs.us>
13 2010-05-26 - Use a paged array for V and I. The address space may be too 13 2010-05-26 - Use a paged array for V and I. The address space may be too
14 fragmented for these big arrays to be contiguous. 14 fragmented for these big arrays to be contiguous.
15 --Stephen Adams <sra@chromium.org> 15 --Stephen Adams <sra@chromium.org>
16 2015-08-03 - Extrat qsufsort to a separate file as template. 16 2015-08-03 - Extract QSufSort to a separate file as template.
17 --Samuel Huang <huangs@chromium.org> 17 --Samuel Huang <huangs@chromium.org>
18 2015-08-19 - Optimize split() and search(), add comments. 18 2015-08-19 - Optimize split() and search(), add comments.
19 --Samuel Huang <huangs@chromium.org> 19 --Samuel Huang <huangs@chromium.org>
20 2016-04-27 - Change split() to use Bentley & McIlroy's pivot selection
21 algorithm, which QSufSort originally used. Reference:
22 http://www.larsson.dogma.net/qsufsort.c
23 --Samuel Huang <huangs@chromium.org>
20 */ 24 */
21 25
22 #include <algorithm> 26 #include <algorithm>
23 #include <cstring> 27 #include <cstring>
24 28
25 namespace courgette { 29 namespace courgette {
26 namespace qsuf { 30 namespace qsuf {
27 31
28 // ------------------------------------------------------------------------ 32 // ------------------------------------------------------------------------
29 // 33 //
30 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the 34 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the
31 // code formatting and variable names. The changes from the original are: 35 // code formatting and variable names. The changes from the original are:
32 // (1) replacing tabs with spaces, 36 // (1) replacing tabs with spaces,
33 // (2) indentation, 37 // (2) indentation,
34 // (3) using 'const', 38 // (3) using 'const',
35 // (4) changing the V and I parameters from int* to template <typename T>. 39 // (4) changing the V and I parameters from int* to template <typename T>.
36 // (5) optimizing split() and search(); fix styles. 40 // (5) optimizing split() and search(); fix styles.
37 // 41 //
38 // The code appears to be a rewritten version of the suffix array algorithm 42 // The code appears to be a rewritten version of the suffix array algorithm
39 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko 43 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko
40 // Sadakane, special cased for bytes. 44 // Sadakane, special cased for bytes.
41 45
42 template <typename T> static void split(T I, T V, int start, int end, int h) { 46 namespace {
47
48 template <typename T> T median3(const T& a, const T& b, const T& c) {
49 if (a < b)
50 return b < c ? b : (a < c ? c : a);
51 return b > c ? b : (a > c ? c : a);
52 }
53
54 } // namespace
55
56 template <typename T> void split(T I, T V, int start, int end, int h) {
43 // For small interval, apply selection sort. 57 // For small interval, apply selection sort.
44 if (end - start < 6) { 58 if (end - start < 16) {
45 for (int i = start; i < end; ) { 59 for (int i = start; i < end; ) {
46 int skip = 1; 60 int skip = 1;
47 int best = V[I[i] + h]; 61 int best = V[I[i] + h];
48 for (int j = i + 1; j < end; j++) { 62 for (int j = i + 1; j < end; j++) {
49 int cur = V[I[j] + h]; 63 int cur = V[I[j] + h];
50 if (best > cur) { 64 if (best > cur) {
51 best = cur; 65 best = cur;
52 int tmp = I[i]; 66 int tmp = I[i];
53 I[i] = I[j]; 67 I[i] = I[j];
54 I[j] = tmp; 68 I[j] = tmp;
(...skipping 10 matching lines...) Expand all
65 I[i] = -1; 79 I[i] = -1;
66 } else { 80 } else {
67 for (int j = i, jend = i + skip; j < jend; j++) 81 for (int j = i, jend = i + skip; j < jend; j++)
68 V[I[j]] = jend - 1; 82 V[I[j]] = jend - 1;
69 } 83 }
70 i += skip; 84 i += skip;
71 } 85 }
72 return; 86 return;
73 } 87 }
74 88
89 // Select pivot, algorithm by Bentley & McIlroy.
90 int n = end - start;
91 int mid = start + (n >> 1);
92 int pivot = V[I[mid] + h];
93 int p1 = V[I[start] + h];
94 int p2 = V[I[end - 1] + h];
95 if (n > 40) { // Big array: Pseudomedian of 9.
96 int s = n >> 3;
97 pivot = median3(pivot, V[I[mid - s] + h], V[I[mid + s] + h]);
98 p1 = median3(p1, V[I[start + s] + h], V[I[start + s + s] + h]);
99 p2 = median3(p2, V[I[end - 1 - s] + h], V[I[end - 1 - s - s] + h]);
100 } // Else medium array: Pseudomedian of 3.
101 pivot = median3(pivot, p1, p2);
102
75 // Split [start, end) into 3 intervals: 103 // Split [start, end) into 3 intervals:
76 // [start, j) with secondary keys < pivot, 104 // [start, j) with secondary keys < pivot,
77 // [j, k) with secondary keys == pivot, 105 // [j, k) with secondary keys == pivot,
78 // [k, end) with secondary keys > pivot. 106 // [k, end) with secondary keys > pivot.
79 int pivot = V[I[(start + end) / 2] + h];
80 int j = start; 107 int j = start;
81 int k = end; 108 int k = end;
82 for (int i = start; i < k; ) { 109 for (int i = start; i < k; ) {
83 int cur = V[I[i] + h]; 110 int cur = V[I[i] + h];
84 if (cur < pivot) { 111 if (cur < pivot) {
85 if (i != j) { 112 if (i != j) {
86 int tmp = I[i]; 113 int tmp = I[i];
87 I[i] = I[j]; 114 I[i] = I[j];
88 I[j] = tmp; 115 I[j] = tmp;
89 } 116 }
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after
190 } 217 }
191 *pos = I[hi]; 218 *pos = I[hi];
192 return y; 219 return y;
193 } 220 }
194 221
195 // End of 'verbatim' code. 222 // End of 'verbatim' code.
196 // ------------------------------------------------------------------------ 223 // ------------------------------------------------------------------------
197 224
198 } // namespace qsuf 225 } // namespace qsuf
199 } // namespace courgette 226 } // namespace courgette
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698