Index: chrome/installer/zucchini/suffix_array_unittest.cc |
diff --git a/chrome/installer/zucchini/suffix_array_unittest.cc b/chrome/installer/zucchini/suffix_array_unittest.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..93335b9915c0f3ce5cd91c94459e284836a6c305 |
--- /dev/null |
+++ b/chrome/installer/zucchini/suffix_array_unittest.cc |
@@ -0,0 +1,227 @@ |
+// Copyright 2017 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "chrome/installer/zucchini/suffix_array.h" |
+ |
+#include <string> |
etiennep1
2017/06/27 18:07:09
#include <initializer_list>
huangs
2017/06/27 20:31:21
#include <cstddef> for size_t
etiennep1
2017/06/28 19:26:09
Done.
etiennep1
2017/06/28 19:26:09
Done.
|
+#include <vector> |
+ |
+#include "testing/gtest/include/gtest/gtest.h" |
+ |
+namespace zucchini { |
+ |
+using ustring = std::basic_string<unsigned char>; |
+ |
+ustring MakeUnsignedString(const std::string& str) { |
+ return {str.begin(), str.end()}; |
+} |
+ |
+template <class T> |
+std::vector<T> MakeVector(const std::initializer_list<T>& ilist) { |
+ return {ilist.begin(), ilist.end()}; |
+} |
+ |
+void TestSlPartition(std::initializer_list<Sais::SLType> expected_sl_partition, |
+ std::initializer_list<size_t> expected_lms_indices, |
+ std::string str) { |
+ using SaisImpl = Sais::Implementation<size_t, size_t>; |
+ |
+ std::vector<Sais::SLType> sl_partition(str.size()); |
+ EXPECT_EQ(expected_lms_indices.size(), |
+ SaisImpl::BuildSLPartition(str.begin(), str.size(), 255, |
+ sl_partition.rbegin())); |
+ EXPECT_EQ(MakeVector(expected_sl_partition), sl_partition); |
+ |
+ std::vector<size_t> lms_indices(expected_lms_indices.size()); |
+ SaisImpl::FindLmsSuffixes(expected_sl_partition, lms_indices.begin()); |
+ EXPECT_EQ(MakeVector(expected_lms_indices), lms_indices); |
+} |
+ |
+TEST(SaisTest, BuildSLPartition) { |
+ TestSlPartition({}, {}, ""); |
+ TestSlPartition( |
+ { |
+ Sais::LType, |
+ }, |
+ {}, "a"); |
+ TestSlPartition( |
+ { |
+ Sais::LType, Sais::LType, |
+ }, |
+ {}, "ba"); |
+ TestSlPartition( |
+ { |
+ Sais::SType, Sais::LType, |
+ }, |
+ {}, "ab"); |
+ TestSlPartition( |
+ { |
+ Sais::SType, Sais::SType, Sais::LType, |
+ }, |
+ {}, "aab"); |
+ TestSlPartition( |
+ { |
+ Sais::LType, Sais::LType, Sais::LType, |
+ }, |
+ {}, "bba"); |
+ TestSlPartition( |
+ { |
+ Sais::LType, Sais::SType, Sais::LType, |
+ }, |
+ {1}, "bab"); |
+ TestSlPartition( |
+ { |
+ Sais::LType, Sais::SType, Sais::SType, Sais::LType, |
+ }, |
+ {1}, "baab"); |
+ |
+ TestSlPartition( |
+ { |
+ Sais::LType, // zucchini |
+ Sais::LType, // ucchini |
+ Sais::SType, // cchini |
+ Sais::SType, // chini |
+ Sais::SType, // hini |
+ Sais::SType, // ini |
+ Sais::LType, // ni |
+ Sais::LType, // i |
+ }, |
+ {2}, "zucchini"); |
+} |
+ |
+std::vector<size_t> BucketCount(const std::initializer_list<unsigned char> str, |
+ size_t max_key) { |
+ using SaisImpl = Sais::Implementation<size_t, size_t>; |
+ return SaisImpl::MakeBucketCount(str.begin(), str.size(), max_key); |
+} |
+ |
+TEST(SaisTest, BucketCount) { |
+ using vec = std::vector<size_t>; |
+ |
+ EXPECT_EQ(vec({0, 0, 0, 0}), BucketCount({}, 4)); |
+ EXPECT_EQ(vec({1, 0, 0, 0}), BucketCount({0}, 4)); |
+ EXPECT_EQ(vec({0, 2, 0, 1}), BucketCount({1, 1, 3}, 4)); |
+} |
+ |
+std::vector<size_t> InducedSortSubstring(std::string str) { |
+ using SaisImpl = Sais::Implementation<size_t, size_t>; |
+ std::vector<Sais::SLType> sl_partition(str.size()); |
+ size_t lms_count = SaisImpl::BuildSLPartition(str.begin(), str.size(), 255, |
+ sl_partition.rbegin()); |
+ std::vector<size_t> lms_indices(lms_count); |
+ SaisImpl::FindLmsSuffixes(sl_partition, lms_indices.begin()); |
+ auto buckets = SaisImpl::MakeBucketCount(str.begin(), str.size(), 255); |
+ |
+ std::vector<size_t> suffix_array(str.size()); |
+ SaisImpl::InducedSort(str, str.size(), sl_partition, lms_indices, buckets, |
+ suffix_array.begin()); |
+ |
+ return suffix_array; |
+} |
+ |
+TEST(SaisTest, InducedSortSubstring) { |
+ using vec = std::vector<size_t>; |
+ |
+ // L; a$ |
+ EXPECT_EQ(vec({0}), InducedSortSubstring("a")); |
+ |
+ // SL; ab$, b$ |
+ EXPECT_EQ(vec({0, 1}), InducedSortSubstring("ab")); |
+ |
+ // LL; a$, ba$ |
+ EXPECT_EQ(vec({1, 0}), InducedSortSubstring("ba")); |
+ |
+ // SLL; a$, aba$, ba$ |
+ EXPECT_EQ(vec({2, 0, 1}), InducedSortSubstring("aba")); |
+ |
+ // LSL; ab$, b$, ba |
+ EXPECT_EQ(vec({1, 2, 0}), InducedSortSubstring("bab")); |
+ |
+ // SSL; aab$, ab$, b$ |
+ EXPECT_EQ(vec({0, 1, 2}), InducedSortSubstring("aab")); |
+ |
+ // LSSL; aab$, ab$, b$, ba |
+ EXPECT_EQ(vec({1, 2, 3, 0}), InducedSortSubstring("baab")); |
+} |
+ |
+template <class Algorithm> |
+void TestSuffixSort(ustring test_str, Algorithm algorithm) { |
+ std::vector<size_t> suffix_array = MakeSuffixArray(algorithm, test_str, 256); |
+ EXPECT_EQ(test_str.size(), suffix_array.size()); |
+ |
+ // Expect that I[] is a permutation of [0, len]. |
+ std::vector<size_t> sorted_suffix(suffix_array.begin(), suffix_array.end()); |
+ std::sort(sorted_suffix.begin(), sorted_suffix.end()); |
+ for (size_t i = 0; i < test_str.size(); ++i) |
+ EXPECT_EQ(i, sorted_suffix[i]); |
+ |
+ // Expect that all suffixes are strictly ordered. |
+ auto end = test_str.end(); |
+ for (size_t i = 1; i < test_str.size(); ++i) { |
+ auto suf1 = test_str.begin() + suffix_array[i - 1]; |
+ auto suf2 = test_str.begin() + suffix_array[i]; |
+ bool is_less = std::lexicographical_compare(suf1, end, suf2, end); |
+ EXPECT_TRUE(is_less); |
+ } |
+} |
+ |
+int TestSearchSuffixArray(std::string base_str, std::string search_str) { |
+ ustring base_ustr = MakeUnsignedString(base_str); |
+ ustring search_ustr = MakeUnsignedString(search_str); |
+ |
+ std::vector<size_t> suffix_array = |
+ MakeSuffixArray(NaiveSuffixSort(), base_ustr, 256); |
+ |
+ auto pos = SearchSuffixArray(suffix_array, base_ustr.begin(), |
+ search_ustr.begin(), search_ustr.end()); |
+ return static_cast<int>(pos - suffix_array.begin()); |
+} |
+ |
+constexpr const char* test_strs[] = { |
+ "", |
+ "a", |
+ "aa", |
+ "za", |
+ "CACAO", |
+ "aaaaa", |
+ "banana", |
+ "tobeornottobe", |
+ "The quick brown fox jumps over the lazy dog.", |
+ "elephantelephantelephantelephantelephant", |
+ "walawalawashington", |
+ "-------------------------", |
+ "011010011001011010010110011010010", |
+ "3141592653589793238462643383279502884197169399375105", |
+ "\xFF\xFE\xFF\xFE\xFD\x80\x30\x31\x32\x80\x30\xFF\x01\xAB\xCD", |
+ "abccbaabccbaabccbaabccbaabccbaabccbaabccbaabccba", |
+ "0123456789876543210", |
+ "9876543210123456789", |
+ "aababcabcdabcdeabcdefabcdefg", |
+ "asdhklgalksdjghalksdjghalksdjgh", |
+}; |
+ |
+TEST(SuffixSortTest, NaiveSuffixSort) { |
+ for (const std::string& test_str : test_strs) { |
+ TestSuffixSort(MakeUnsignedString(test_str), NaiveSuffixSort()); |
+ } |
+} |
+ |
+TEST(SuffixSortTest, Sort) { |
+ for (const std::string& test_str : test_strs) { |
+ TestSuffixSort(MakeUnsignedString(test_str), Sais()); |
+ } |
+} |
+ |
+TEST(SuffixSortTest, Search) { |
+ EXPECT_EQ(0, TestSearchSuffixArray("", "")); |
+ EXPECT_EQ(0, TestSearchSuffixArray("", "a")); |
+ EXPECT_EQ(0, TestSearchSuffixArray("b", "")); |
+ EXPECT_EQ(0, TestSearchSuffixArray("b", "a")); |
+ EXPECT_EQ(1, TestSearchSuffixArray("b", "c")); |
+ EXPECT_EQ(1, TestSearchSuffixArray("b", "bc")); |
+ EXPECT_EQ(0, TestSearchSuffixArray("aa", "a")); |
+ EXPECT_EQ(1, TestSearchSuffixArray("aa", "aa")); |
+} |
+ |
huangs
2017/06/27 20:31:21
Please also adapt the more comprehensive test in
etiennep1
2017/06/28 19:26:09
I added more search unittest based on these test c
|
+} // namespace zucchini |