OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2017 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/installer/zucchini/suffix_array.h" | |
6 | |
7 #include <string> | |
etiennep1
2017/06/27 18:07:09
#include <initializer_list>
huangs
2017/06/27 20:31:21
#include <cstddef> for size_t
etiennep1
2017/06/28 19:26:09
Done.
etiennep1
2017/06/28 19:26:09
Done.
| |
8 #include <vector> | |
9 | |
10 #include "testing/gtest/include/gtest/gtest.h" | |
11 | |
12 namespace zucchini { | |
13 | |
14 using ustring = std::basic_string<unsigned char>; | |
15 | |
16 ustring MakeUnsignedString(const std::string& str) { | |
17 return {str.begin(), str.end()}; | |
18 } | |
19 | |
20 template <class T> | |
21 std::vector<T> MakeVector(const std::initializer_list<T>& ilist) { | |
22 return {ilist.begin(), ilist.end()}; | |
23 } | |
24 | |
25 void TestSlPartition(std::initializer_list<Sais::SLType> expected_sl_partition, | |
26 std::initializer_list<size_t> expected_lms_indices, | |
27 std::string str) { | |
28 using SaisImpl = Sais::Implementation<size_t, size_t>; | |
29 | |
30 std::vector<Sais::SLType> sl_partition(str.size()); | |
31 EXPECT_EQ(expected_lms_indices.size(), | |
32 SaisImpl::BuildSLPartition(str.begin(), str.size(), 255, | |
33 sl_partition.rbegin())); | |
34 EXPECT_EQ(MakeVector(expected_sl_partition), sl_partition); | |
35 | |
36 std::vector<size_t> lms_indices(expected_lms_indices.size()); | |
37 SaisImpl::FindLmsSuffixes(expected_sl_partition, lms_indices.begin()); | |
38 EXPECT_EQ(MakeVector(expected_lms_indices), lms_indices); | |
39 } | |
40 | |
41 TEST(SaisTest, BuildSLPartition) { | |
42 TestSlPartition({}, {}, ""); | |
43 TestSlPartition( | |
44 { | |
45 Sais::LType, | |
46 }, | |
47 {}, "a"); | |
48 TestSlPartition( | |
49 { | |
50 Sais::LType, Sais::LType, | |
51 }, | |
52 {}, "ba"); | |
53 TestSlPartition( | |
54 { | |
55 Sais::SType, Sais::LType, | |
56 }, | |
57 {}, "ab"); | |
58 TestSlPartition( | |
59 { | |
60 Sais::SType, Sais::SType, Sais::LType, | |
61 }, | |
62 {}, "aab"); | |
63 TestSlPartition( | |
64 { | |
65 Sais::LType, Sais::LType, Sais::LType, | |
66 }, | |
67 {}, "bba"); | |
68 TestSlPartition( | |
69 { | |
70 Sais::LType, Sais::SType, Sais::LType, | |
71 }, | |
72 {1}, "bab"); | |
73 TestSlPartition( | |
74 { | |
75 Sais::LType, Sais::SType, Sais::SType, Sais::LType, | |
76 }, | |
77 {1}, "baab"); | |
78 | |
79 TestSlPartition( | |
80 { | |
81 Sais::LType, // zucchini | |
82 Sais::LType, // ucchini | |
83 Sais::SType, // cchini | |
84 Sais::SType, // chini | |
85 Sais::SType, // hini | |
86 Sais::SType, // ini | |
87 Sais::LType, // ni | |
88 Sais::LType, // i | |
89 }, | |
90 {2}, "zucchini"); | |
91 } | |
92 | |
93 std::vector<size_t> BucketCount(const std::initializer_list<unsigned char> str, | |
94 size_t max_key) { | |
95 using SaisImpl = Sais::Implementation<size_t, size_t>; | |
96 return SaisImpl::MakeBucketCount(str.begin(), str.size(), max_key); | |
97 } | |
98 | |
99 TEST(SaisTest, BucketCount) { | |
100 using vec = std::vector<size_t>; | |
101 | |
102 EXPECT_EQ(vec({0, 0, 0, 0}), BucketCount({}, 4)); | |
103 EXPECT_EQ(vec({1, 0, 0, 0}), BucketCount({0}, 4)); | |
104 EXPECT_EQ(vec({0, 2, 0, 1}), BucketCount({1, 1, 3}, 4)); | |
105 } | |
106 | |
107 std::vector<size_t> InducedSortSubstring(std::string str) { | |
108 using SaisImpl = Sais::Implementation<size_t, size_t>; | |
109 std::vector<Sais::SLType> sl_partition(str.size()); | |
110 size_t lms_count = SaisImpl::BuildSLPartition(str.begin(), str.size(), 255, | |
111 sl_partition.rbegin()); | |
112 std::vector<size_t> lms_indices(lms_count); | |
113 SaisImpl::FindLmsSuffixes(sl_partition, lms_indices.begin()); | |
114 auto buckets = SaisImpl::MakeBucketCount(str.begin(), str.size(), 255); | |
115 | |
116 std::vector<size_t> suffix_array(str.size()); | |
117 SaisImpl::InducedSort(str, str.size(), sl_partition, lms_indices, buckets, | |
118 suffix_array.begin()); | |
119 | |
120 return suffix_array; | |
121 } | |
122 | |
123 TEST(SaisTest, InducedSortSubstring) { | |
124 using vec = std::vector<size_t>; | |
125 | |
126 // L; a$ | |
127 EXPECT_EQ(vec({0}), InducedSortSubstring("a")); | |
128 | |
129 // SL; ab$, b$ | |
130 EXPECT_EQ(vec({0, 1}), InducedSortSubstring("ab")); | |
131 | |
132 // LL; a$, ba$ | |
133 EXPECT_EQ(vec({1, 0}), InducedSortSubstring("ba")); | |
134 | |
135 // SLL; a$, aba$, ba$ | |
136 EXPECT_EQ(vec({2, 0, 1}), InducedSortSubstring("aba")); | |
137 | |
138 // LSL; ab$, b$, ba | |
139 EXPECT_EQ(vec({1, 2, 0}), InducedSortSubstring("bab")); | |
140 | |
141 // SSL; aab$, ab$, b$ | |
142 EXPECT_EQ(vec({0, 1, 2}), InducedSortSubstring("aab")); | |
143 | |
144 // LSSL; aab$, ab$, b$, ba | |
145 EXPECT_EQ(vec({1, 2, 3, 0}), InducedSortSubstring("baab")); | |
146 } | |
147 | |
148 template <class Algorithm> | |
149 void TestSuffixSort(ustring test_str, Algorithm algorithm) { | |
150 std::vector<size_t> suffix_array = MakeSuffixArray(algorithm, test_str, 256); | |
151 EXPECT_EQ(test_str.size(), suffix_array.size()); | |
152 | |
153 // Expect that I[] is a permutation of [0, len]. | |
154 std::vector<size_t> sorted_suffix(suffix_array.begin(), suffix_array.end()); | |
155 std::sort(sorted_suffix.begin(), sorted_suffix.end()); | |
156 for (size_t i = 0; i < test_str.size(); ++i) | |
157 EXPECT_EQ(i, sorted_suffix[i]); | |
158 | |
159 // Expect that all suffixes are strictly ordered. | |
160 auto end = test_str.end(); | |
161 for (size_t i = 1; i < test_str.size(); ++i) { | |
162 auto suf1 = test_str.begin() + suffix_array[i - 1]; | |
163 auto suf2 = test_str.begin() + suffix_array[i]; | |
164 bool is_less = std::lexicographical_compare(suf1, end, suf2, end); | |
165 EXPECT_TRUE(is_less); | |
166 } | |
167 } | |
168 | |
169 int TestSearchSuffixArray(std::string base_str, std::string search_str) { | |
170 ustring base_ustr = MakeUnsignedString(base_str); | |
171 ustring search_ustr = MakeUnsignedString(search_str); | |
172 | |
173 std::vector<size_t> suffix_array = | |
174 MakeSuffixArray(NaiveSuffixSort(), base_ustr, 256); | |
175 | |
176 auto pos = SearchSuffixArray(suffix_array, base_ustr.begin(), | |
177 search_ustr.begin(), search_ustr.end()); | |
178 return static_cast<int>(pos - suffix_array.begin()); | |
179 } | |
180 | |
181 constexpr const char* test_strs[] = { | |
182 "", | |
183 "a", | |
184 "aa", | |
185 "za", | |
186 "CACAO", | |
187 "aaaaa", | |
188 "banana", | |
189 "tobeornottobe", | |
190 "The quick brown fox jumps over the lazy dog.", | |
191 "elephantelephantelephantelephantelephant", | |
192 "walawalawashington", | |
193 "-------------------------", | |
194 "011010011001011010010110011010010", | |
195 "3141592653589793238462643383279502884197169399375105", | |
196 "\xFF\xFE\xFF\xFE\xFD\x80\x30\x31\x32\x80\x30\xFF\x01\xAB\xCD", | |
197 "abccbaabccbaabccbaabccbaabccbaabccbaabccbaabccba", | |
198 "0123456789876543210", | |
199 "9876543210123456789", | |
200 "aababcabcdabcdeabcdefabcdefg", | |
201 "asdhklgalksdjghalksdjghalksdjgh", | |
202 }; | |
203 | |
204 TEST(SuffixSortTest, NaiveSuffixSort) { | |
205 for (const std::string& test_str : test_strs) { | |
206 TestSuffixSort(MakeUnsignedString(test_str), NaiveSuffixSort()); | |
207 } | |
208 } | |
209 | |
210 TEST(SuffixSortTest, Sort) { | |
211 for (const std::string& test_str : test_strs) { | |
212 TestSuffixSort(MakeUnsignedString(test_str), Sais()); | |
213 } | |
214 } | |
215 | |
216 TEST(SuffixSortTest, Search) { | |
217 EXPECT_EQ(0, TestSearchSuffixArray("", "")); | |
218 EXPECT_EQ(0, TestSearchSuffixArray("", "a")); | |
219 EXPECT_EQ(0, TestSearchSuffixArray("b", "")); | |
220 EXPECT_EQ(0, TestSearchSuffixArray("b", "a")); | |
221 EXPECT_EQ(1, TestSearchSuffixArray("b", "c")); | |
222 EXPECT_EQ(1, TestSearchSuffixArray("b", "bc")); | |
223 EXPECT_EQ(0, TestSearchSuffixArray("aa", "a")); | |
224 EXPECT_EQ(1, TestSearchSuffixArray("aa", "aa")); | |
225 } | |
226 | |
huangs
2017/06/27 20:31:21
Please also adapt the more comprehensive test in
etiennep1
2017/06/28 19:26:09
I added more search unittest based on these test c
| |
227 } // namespace zucchini | |
OLD | NEW |