Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(50)

Side by Side Diff: chrome/installer/zucchini/suffix_array_unittest.cc

Issue 2963463002: [Zucchini] Generic suffix array algorithms. (Closed)
Patch Set: Make gcc happy Created 3 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/installer/zucchini/suffix_array.h"
6
7 #include <string>
etiennep1 2017/06/27 18:07:09 #include <initializer_list>
huangs 2017/06/27 20:31:21 #include <cstddef> for size_t
etiennep1 2017/06/28 19:26:09 Done.
etiennep1 2017/06/28 19:26:09 Done.
8 #include <vector>
9
10 #include "testing/gtest/include/gtest/gtest.h"
11
12 namespace zucchini {
13
14 using ustring = std::basic_string<unsigned char>;
15
16 ustring MakeUnsignedString(const std::string& str) {
17 return {str.begin(), str.end()};
18 }
19
20 template <class T>
21 std::vector<T> MakeVector(const std::initializer_list<T>& ilist) {
22 return {ilist.begin(), ilist.end()};
23 }
24
25 void TestSlPartition(std::initializer_list<Sais::SLType> expected_sl_partition,
26 std::initializer_list<size_t> expected_lms_indices,
27 std::string str) {
28 using SaisImpl = Sais::Implementation<size_t, size_t>;
29
30 std::vector<Sais::SLType> sl_partition(str.size());
31 EXPECT_EQ(expected_lms_indices.size(),
32 SaisImpl::BuildSLPartition(str.begin(), str.size(), 255,
33 sl_partition.rbegin()));
34 EXPECT_EQ(MakeVector(expected_sl_partition), sl_partition);
35
36 std::vector<size_t> lms_indices(expected_lms_indices.size());
37 SaisImpl::FindLmsSuffixes(expected_sl_partition, lms_indices.begin());
38 EXPECT_EQ(MakeVector(expected_lms_indices), lms_indices);
39 }
40
41 TEST(SaisTest, BuildSLPartition) {
42 TestSlPartition({}, {}, "");
43 TestSlPartition(
44 {
45 Sais::LType,
46 },
47 {}, "a");
48 TestSlPartition(
49 {
50 Sais::LType, Sais::LType,
51 },
52 {}, "ba");
53 TestSlPartition(
54 {
55 Sais::SType, Sais::LType,
56 },
57 {}, "ab");
58 TestSlPartition(
59 {
60 Sais::SType, Sais::SType, Sais::LType,
61 },
62 {}, "aab");
63 TestSlPartition(
64 {
65 Sais::LType, Sais::LType, Sais::LType,
66 },
67 {}, "bba");
68 TestSlPartition(
69 {
70 Sais::LType, Sais::SType, Sais::LType,
71 },
72 {1}, "bab");
73 TestSlPartition(
74 {
75 Sais::LType, Sais::SType, Sais::SType, Sais::LType,
76 },
77 {1}, "baab");
78
79 TestSlPartition(
80 {
81 Sais::LType, // zucchini
82 Sais::LType, // ucchini
83 Sais::SType, // cchini
84 Sais::SType, // chini
85 Sais::SType, // hini
86 Sais::SType, // ini
87 Sais::LType, // ni
88 Sais::LType, // i
89 },
90 {2}, "zucchini");
91 }
92
93 std::vector<size_t> BucketCount(const std::initializer_list<unsigned char> str,
94 size_t max_key) {
95 using SaisImpl = Sais::Implementation<size_t, size_t>;
96 return SaisImpl::MakeBucketCount(str.begin(), str.size(), max_key);
97 }
98
99 TEST(SaisTest, BucketCount) {
100 using vec = std::vector<size_t>;
101
102 EXPECT_EQ(vec({0, 0, 0, 0}), BucketCount({}, 4));
103 EXPECT_EQ(vec({1, 0, 0, 0}), BucketCount({0}, 4));
104 EXPECT_EQ(vec({0, 2, 0, 1}), BucketCount({1, 1, 3}, 4));
105 }
106
107 std::vector<size_t> InducedSortSubstring(std::string str) {
108 using SaisImpl = Sais::Implementation<size_t, size_t>;
109 std::vector<Sais::SLType> sl_partition(str.size());
110 size_t lms_count = SaisImpl::BuildSLPartition(str.begin(), str.size(), 255,
111 sl_partition.rbegin());
112 std::vector<size_t> lms_indices(lms_count);
113 SaisImpl::FindLmsSuffixes(sl_partition, lms_indices.begin());
114 auto buckets = SaisImpl::MakeBucketCount(str.begin(), str.size(), 255);
115
116 std::vector<size_t> suffix_array(str.size());
117 SaisImpl::InducedSort(str, str.size(), sl_partition, lms_indices, buckets,
118 suffix_array.begin());
119
120 return suffix_array;
121 }
122
123 TEST(SaisTest, InducedSortSubstring) {
124 using vec = std::vector<size_t>;
125
126 // L; a$
127 EXPECT_EQ(vec({0}), InducedSortSubstring("a"));
128
129 // SL; ab$, b$
130 EXPECT_EQ(vec({0, 1}), InducedSortSubstring("ab"));
131
132 // LL; a$, ba$
133 EXPECT_EQ(vec({1, 0}), InducedSortSubstring("ba"));
134
135 // SLL; a$, aba$, ba$
136 EXPECT_EQ(vec({2, 0, 1}), InducedSortSubstring("aba"));
137
138 // LSL; ab$, b$, ba
139 EXPECT_EQ(vec({1, 2, 0}), InducedSortSubstring("bab"));
140
141 // SSL; aab$, ab$, b$
142 EXPECT_EQ(vec({0, 1, 2}), InducedSortSubstring("aab"));
143
144 // LSSL; aab$, ab$, b$, ba
145 EXPECT_EQ(vec({1, 2, 3, 0}), InducedSortSubstring("baab"));
146 }
147
148 template <class Algorithm>
149 void TestSuffixSort(ustring test_str, Algorithm algorithm) {
150 std::vector<size_t> suffix_array = MakeSuffixArray(algorithm, test_str, 256);
151 EXPECT_EQ(test_str.size(), suffix_array.size());
152
153 // Expect that I[] is a permutation of [0, len].
154 std::vector<size_t> sorted_suffix(suffix_array.begin(), suffix_array.end());
155 std::sort(sorted_suffix.begin(), sorted_suffix.end());
156 for (size_t i = 0; i < test_str.size(); ++i)
157 EXPECT_EQ(i, sorted_suffix[i]);
158
159 // Expect that all suffixes are strictly ordered.
160 auto end = test_str.end();
161 for (size_t i = 1; i < test_str.size(); ++i) {
162 auto suf1 = test_str.begin() + suffix_array[i - 1];
163 auto suf2 = test_str.begin() + suffix_array[i];
164 bool is_less = std::lexicographical_compare(suf1, end, suf2, end);
165 EXPECT_TRUE(is_less);
166 }
167 }
168
169 int TestSearchSuffixArray(std::string base_str, std::string search_str) {
170 ustring base_ustr = MakeUnsignedString(base_str);
171 ustring search_ustr = MakeUnsignedString(search_str);
172
173 std::vector<size_t> suffix_array =
174 MakeSuffixArray(NaiveSuffixSort(), base_ustr, 256);
175
176 auto pos = SearchSuffixArray(suffix_array, base_ustr.begin(),
177 search_ustr.begin(), search_ustr.end());
178 return static_cast<int>(pos - suffix_array.begin());
179 }
180
181 constexpr const char* test_strs[] = {
182 "",
183 "a",
184 "aa",
185 "za",
186 "CACAO",
187 "aaaaa",
188 "banana",
189 "tobeornottobe",
190 "The quick brown fox jumps over the lazy dog.",
191 "elephantelephantelephantelephantelephant",
192 "walawalawashington",
193 "-------------------------",
194 "011010011001011010010110011010010",
195 "3141592653589793238462643383279502884197169399375105",
196 "\xFF\xFE\xFF\xFE\xFD\x80\x30\x31\x32\x80\x30\xFF\x01\xAB\xCD",
197 "abccbaabccbaabccbaabccbaabccbaabccbaabccbaabccba",
198 "0123456789876543210",
199 "9876543210123456789",
200 "aababcabcdabcdeabcdefabcdefg",
201 "asdhklgalksdjghalksdjghalksdjgh",
202 };
203
204 TEST(SuffixSortTest, NaiveSuffixSort) {
205 for (const std::string& test_str : test_strs) {
206 TestSuffixSort(MakeUnsignedString(test_str), NaiveSuffixSort());
207 }
208 }
209
210 TEST(SuffixSortTest, Sort) {
211 for (const std::string& test_str : test_strs) {
212 TestSuffixSort(MakeUnsignedString(test_str), Sais());
213 }
214 }
215
216 TEST(SuffixSortTest, Search) {
217 EXPECT_EQ(0, TestSearchSuffixArray("", ""));
218 EXPECT_EQ(0, TestSearchSuffixArray("", "a"));
219 EXPECT_EQ(0, TestSearchSuffixArray("b", ""));
220 EXPECT_EQ(0, TestSearchSuffixArray("b", "a"));
221 EXPECT_EQ(1, TestSearchSuffixArray("b", "c"));
222 EXPECT_EQ(1, TestSearchSuffixArray("b", "bc"));
223 EXPECT_EQ(0, TestSearchSuffixArray("aa", "a"));
224 EXPECT_EQ(1, TestSearchSuffixArray("aa", "aa"));
225 }
226
huangs 2017/06/27 20:31:21 Please also adapt the more comprehensive test in
etiennep1 2017/06/28 19:26:09 I added more search unittest based on these test c
227 } // namespace zucchini
OLDNEW
« chrome/installer/zucchini/suffix_array.h ('K') | « chrome/installer/zucchini/suffix_array.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698