Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(34)

Side by Side Diff: courgette/third_party/divsufsort/divsufsort_unittest.cc

Issue 1948843002: [Courgette Experimental] Replace QSufSort with libdivsufsort Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Sync and merge. Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "courgette/third_party/divsufsort/divsufsort.h"
6
7 #include <stddef.h>
8
9 #include <algorithm>
10 #include <cstring>
11 #include <memory>
12 #include <random>
13 #include <string>
14 #include <vector>
15
16 #include "base/macros.h"
17 #include "base/time/time.h"
18 #include "courgette/third_party/bsdiff/bsdiff_search.h"
19 #include "courgette/third_party/bsdiff/paged_array.h"
20 #include "testing/gtest/include/gtest/gtest.h"
21
22 namespace courgette {
23
24 TEST(DivSufSortTest, Sort) {
25 const char* test_cases[] = {
26 "",
27 "a",
28 "za",
29 "CACAO",
30 "banana",
31 "tobeornottobe",
32 "The quick brown fox jumps over the lazy dog.",
33 "elephantelephantelephantelephantelephant",
34 "-------------------------",
35 "011010011001011010010110011010010",
36 "3141592653589793238462643383279502884197169399375105",
37 "\xFF\xFE\xFF\xFE\xFD\x80\x30\x31\x32\x80\x30\xFF\x01\xAB\xCD",
38 };
39
40 for (size_t idx = 0; idx < arraysize(test_cases); ++idx) {
41 int len = static_cast<int>(::strlen(test_cases[idx]));
42 const unsigned char* s =
43 reinterpret_cast<const unsigned char*>(test_cases[idx]);
44
45 // Generate the suffix array as I.
46 PagedArray<divsuf::saidx_t> I;
47 ASSERT_TRUE(I.Allocate(len + 1));
48 I[0] = len;
49 divsuf::divsufsort(s, I.begin() + 1, len);
50
51 // Expect that I[] is a permutation of [0, len].
52 std::vector<divsuf::saidx_t> I_sorted(I.begin(), I.end());
53 std::sort(I_sorted.begin(), I_sorted.end());
54
55 for (divsuf::saidx_t i = 0; i < len; ++i)
56 EXPECT_EQ(i, I_sorted[i]) << "test_case[" << idx << "]";
57
58 // Expect that the |len| non-empty suffixes are strictly ordered.
59 const unsigned char* end = s + len;
60 for (divsuf::saidx_t i = 1; i < len; ++i) {
61 const unsigned char* suf1 = s + I[i - 1];
62 const unsigned char* suf2 = s + I[i];
63 bool is_less = std::lexicographical_compare(suf1, end, suf2, end);
64 EXPECT_TRUE(is_less) << "test_case[" << idx << "]";
65 }
66 }
67 }
68
69 TEST(DivSufSortTest, Search) {
70 // Initialize main string and the suffix array.
71 // Positions: 00000000001111111111122222222233333333334444
72 // 01234567890123456789012345678901234567890123
73 const char* old_str = "the quick brown fox jumps over the lazy dog.";
74 int old_size = static_cast<int>(::strlen(old_str));
75 const unsigned char* old_buf =
76 reinterpret_cast<const unsigned char*>(old_str);
77 PagedArray<divsuf::saidx_t> I;
78 ASSERT_TRUE(I.Allocate(old_size + 1));
79 I[0] = old_size;
80 divsuf::divsufsort(old_buf, I.begin() + 1, old_size);
81
82 // Test queries.
83 const struct {
84 int exp_pos; // -1 means "don't care".
85 int exp_match_len;
86 const char* query_str;
87 } test_cases[] = {
88 // Entire string.
89 {0, 44, "the quick brown fox jumps over the lazy dog."},
90 // Empty string.
91 {-1, 0, ""}, // Current algorithm does not enforce |pos| == 0.
92 // Exact and unique suffix match.
93 {43, 1, "."},
94 {31, 13, "the lazy dog."},
95 // Exact and unique non-suffix match.
96 {4, 5, "quick"},
97 {0, 9, "the quick"}, // Unique prefix.
98 // Entire word match with mutiple results: take lexicographical first.
99 {31, 3, "the"}, // Non-unique prefix: "the l"... < "the q"...
100 {9, 1, " "}, // " brown"... wins.
101 // Partial and unique match of query prefix.
102 {16, 10, "fox jumps with the hosps"},
103 {16, 10, "fox jumps "},
104 {16, 10, "fox jumps w"},
105 // Partial and multiple match of query prefix: no guarantees on |pos|.
106 // Take lexicographical first for matching portion *only*, so same results:
107 {-1, 4, "the apple"}, // query < "the l"... < "the q"...
108 {-1, 4, "the opera"}, // "the l"... < query < "the q"...
109 {-1, 4, "the zebra"}, // "the l"... < "the q"... < query
110 // Prefix match dominates suffix match.
111 {26, 5, "over quick brown fox"},
112 // No match.
113 {-1, 0, ","},
114 {-1, 0, "1234"},
115 {-1, 0, "THE QUICK BROWN FOX"},
116 {-1, 0, "(the"},
117 };
118
119 for (size_t idx = 0; idx < arraysize(test_cases); ++idx) {
120 const auto& test_case = test_cases[idx];
121 int new_size = static_cast<int>(::strlen(test_case.query_str));
122 const unsigned char* new_buf =
123 reinterpret_cast<const unsigned char*>(test_case.query_str);
124
125 // Perform the search.
126 int pos = 0;
127 int match_len = bsdiff::search<PagedArray<divsuf::saidx_t>&>(
128 I, old_buf, old_size, new_buf, new_size, &pos);
129
130 // Check basic properties and match with expected values.
131 EXPECT_GE(match_len, 0) << "test_case[" << idx << "]";
132 EXPECT_LE(match_len, new_size) << "test_case[" << idx << "]";
133 if (match_len > 0) {
134 EXPECT_GE(pos, 0) << "test_case[" << idx << "]";
135 EXPECT_LE(pos, old_size - match_len) << "test_case[" << idx << "]";
136 EXPECT_EQ(0, ::memcmp(old_buf + pos, new_buf, match_len))
137 << "test_case[" << idx << "]";
138 }
139 if (test_case.exp_pos >= 0) {
140 EXPECT_EQ(test_case.exp_pos, pos) << "test_case[" << idx << "]";
141 }
142 EXPECT_EQ(test_case.exp_match_len, match_len) << "test_case[" << idx << "]";
143 }
144 }
145
146 // "Big" test case on pseudorandom data, mainly to measure timing.
147 TEST(DivSufSortTest, Big) {
148 const int kSize = 1 << 24;
149 std::minstd_rand rand_gen;
150 std::unique_ptr<divsuf::sauchar_t> buf(new divsuf::sauchar_t[kSize]);
151 base::Time::EnableHighResolutionTimer(true);
152 base::Time::ActivateHighResolutionTimer(true);
153
154 std::fill(buf.get(), buf.get() + kSize, 0);
155 const int kBound = kSize / 3;
156 for (int i = 0; i < kBound; ++i)
157 buf.get()[i] = i & 0xFF;
158 std::shuffle(buf.get(), buf.get() + kSize, rand_gen);
159
160 PagedArray<divsuf::saidx_t> I;
161 ASSERT_TRUE(I.Allocate(kSize + 1));
162
163 base::Time t0 = base::Time::Now();
164 I[0] = kSize;
165 divsuf::divsufsort(buf.get(), I.begin() + 1, kSize);
166 base::TimeDelta dt = base::Time::Now() - t0;
167 fprintf(stderr, "%.3f s\n", dt.InSecondsF());
168
169 base::Time::ActivateHighResolutionTimer(false);
170 }
171
172 } // namespace courgette
OLDNEW
« no previous file with comments | « courgette/third_party/divsufsort/divsufsort_private.h ('k') | courgette/third_party/divsufsort/sssort.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698