Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7339)

Unified Diff: chrome/installer/zucchini/suffix_array_unittest.cc

Issue 2963463002: [Zucchini] Generic suffix array algorithms. (Closed)
Patch Set: Make gcc happy Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/installer/zucchini/suffix_array_unittest.cc
diff --git a/chrome/installer/zucchini/suffix_array_unittest.cc b/chrome/installer/zucchini/suffix_array_unittest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..93335b9915c0f3ce5cd91c94459e284836a6c305
--- /dev/null
+++ b/chrome/installer/zucchini/suffix_array_unittest.cc
@@ -0,0 +1,227 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/installer/zucchini/suffix_array.h"
+
+#include <string>
etiennep1 2017/06/27 18:07:09 #include <initializer_list>
huangs 2017/06/27 20:31:21 #include <cstddef> for size_t
etiennep1 2017/06/28 19:26:09 Done.
etiennep1 2017/06/28 19:26:09 Done.
+#include <vector>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+using ustring = std::basic_string<unsigned char>;
+
+ustring MakeUnsignedString(const std::string& str) {
+ return {str.begin(), str.end()};
+}
+
+template <class T>
+std::vector<T> MakeVector(const std::initializer_list<T>& ilist) {
+ return {ilist.begin(), ilist.end()};
+}
+
+void TestSlPartition(std::initializer_list<Sais::SLType> expected_sl_partition,
+ std::initializer_list<size_t> expected_lms_indices,
+ std::string str) {
+ using SaisImpl = Sais::Implementation<size_t, size_t>;
+
+ std::vector<Sais::SLType> sl_partition(str.size());
+ EXPECT_EQ(expected_lms_indices.size(),
+ SaisImpl::BuildSLPartition(str.begin(), str.size(), 255,
+ sl_partition.rbegin()));
+ EXPECT_EQ(MakeVector(expected_sl_partition), sl_partition);
+
+ std::vector<size_t> lms_indices(expected_lms_indices.size());
+ SaisImpl::FindLmsSuffixes(expected_sl_partition, lms_indices.begin());
+ EXPECT_EQ(MakeVector(expected_lms_indices), lms_indices);
+}
+
+TEST(SaisTest, BuildSLPartition) {
+ TestSlPartition({}, {}, "");
+ TestSlPartition(
+ {
+ Sais::LType,
+ },
+ {}, "a");
+ TestSlPartition(
+ {
+ Sais::LType, Sais::LType,
+ },
+ {}, "ba");
+ TestSlPartition(
+ {
+ Sais::SType, Sais::LType,
+ },
+ {}, "ab");
+ TestSlPartition(
+ {
+ Sais::SType, Sais::SType, Sais::LType,
+ },
+ {}, "aab");
+ TestSlPartition(
+ {
+ Sais::LType, Sais::LType, Sais::LType,
+ },
+ {}, "bba");
+ TestSlPartition(
+ {
+ Sais::LType, Sais::SType, Sais::LType,
+ },
+ {1}, "bab");
+ TestSlPartition(
+ {
+ Sais::LType, Sais::SType, Sais::SType, Sais::LType,
+ },
+ {1}, "baab");
+
+ TestSlPartition(
+ {
+ Sais::LType, // zucchini
+ Sais::LType, // ucchini
+ Sais::SType, // cchini
+ Sais::SType, // chini
+ Sais::SType, // hini
+ Sais::SType, // ini
+ Sais::LType, // ni
+ Sais::LType, // i
+ },
+ {2}, "zucchini");
+}
+
+std::vector<size_t> BucketCount(const std::initializer_list<unsigned char> str,
+ size_t max_key) {
+ using SaisImpl = Sais::Implementation<size_t, size_t>;
+ return SaisImpl::MakeBucketCount(str.begin(), str.size(), max_key);
+}
+
+TEST(SaisTest, BucketCount) {
+ using vec = std::vector<size_t>;
+
+ EXPECT_EQ(vec({0, 0, 0, 0}), BucketCount({}, 4));
+ EXPECT_EQ(vec({1, 0, 0, 0}), BucketCount({0}, 4));
+ EXPECT_EQ(vec({0, 2, 0, 1}), BucketCount({1, 1, 3}, 4));
+}
+
+std::vector<size_t> InducedSortSubstring(std::string str) {
+ using SaisImpl = Sais::Implementation<size_t, size_t>;
+ std::vector<Sais::SLType> sl_partition(str.size());
+ size_t lms_count = SaisImpl::BuildSLPartition(str.begin(), str.size(), 255,
+ sl_partition.rbegin());
+ std::vector<size_t> lms_indices(lms_count);
+ SaisImpl::FindLmsSuffixes(sl_partition, lms_indices.begin());
+ auto buckets = SaisImpl::MakeBucketCount(str.begin(), str.size(), 255);
+
+ std::vector<size_t> suffix_array(str.size());
+ SaisImpl::InducedSort(str, str.size(), sl_partition, lms_indices, buckets,
+ suffix_array.begin());
+
+ return suffix_array;
+}
+
+TEST(SaisTest, InducedSortSubstring) {
+ using vec = std::vector<size_t>;
+
+ // L; a$
+ EXPECT_EQ(vec({0}), InducedSortSubstring("a"));
+
+ // SL; ab$, b$
+ EXPECT_EQ(vec({0, 1}), InducedSortSubstring("ab"));
+
+ // LL; a$, ba$
+ EXPECT_EQ(vec({1, 0}), InducedSortSubstring("ba"));
+
+ // SLL; a$, aba$, ba$
+ EXPECT_EQ(vec({2, 0, 1}), InducedSortSubstring("aba"));
+
+ // LSL; ab$, b$, ba
+ EXPECT_EQ(vec({1, 2, 0}), InducedSortSubstring("bab"));
+
+ // SSL; aab$, ab$, b$
+ EXPECT_EQ(vec({0, 1, 2}), InducedSortSubstring("aab"));
+
+ // LSSL; aab$, ab$, b$, ba
+ EXPECT_EQ(vec({1, 2, 3, 0}), InducedSortSubstring("baab"));
+}
+
+template <class Algorithm>
+void TestSuffixSort(ustring test_str, Algorithm algorithm) {
+ std::vector<size_t> suffix_array = MakeSuffixArray(algorithm, test_str, 256);
+ EXPECT_EQ(test_str.size(), suffix_array.size());
+
+ // Expect that I[] is a permutation of [0, len].
+ std::vector<size_t> sorted_suffix(suffix_array.begin(), suffix_array.end());
+ std::sort(sorted_suffix.begin(), sorted_suffix.end());
+ for (size_t i = 0; i < test_str.size(); ++i)
+ EXPECT_EQ(i, sorted_suffix[i]);
+
+ // Expect that all suffixes are strictly ordered.
+ auto end = test_str.end();
+ for (size_t i = 1; i < test_str.size(); ++i) {
+ auto suf1 = test_str.begin() + suffix_array[i - 1];
+ auto suf2 = test_str.begin() + suffix_array[i];
+ bool is_less = std::lexicographical_compare(suf1, end, suf2, end);
+ EXPECT_TRUE(is_less);
+ }
+}
+
+int TestSearchSuffixArray(std::string base_str, std::string search_str) {
+ ustring base_ustr = MakeUnsignedString(base_str);
+ ustring search_ustr = MakeUnsignedString(search_str);
+
+ std::vector<size_t> suffix_array =
+ MakeSuffixArray(NaiveSuffixSort(), base_ustr, 256);
+
+ auto pos = SearchSuffixArray(suffix_array, base_ustr.begin(),
+ search_ustr.begin(), search_ustr.end());
+ return static_cast<int>(pos - suffix_array.begin());
+}
+
+constexpr const char* test_strs[] = {
+ "",
+ "a",
+ "aa",
+ "za",
+ "CACAO",
+ "aaaaa",
+ "banana",
+ "tobeornottobe",
+ "The quick brown fox jumps over the lazy dog.",
+ "elephantelephantelephantelephantelephant",
+ "walawalawashington",
+ "-------------------------",
+ "011010011001011010010110011010010",
+ "3141592653589793238462643383279502884197169399375105",
+ "\xFF\xFE\xFF\xFE\xFD\x80\x30\x31\x32\x80\x30\xFF\x01\xAB\xCD",
+ "abccbaabccbaabccbaabccbaabccbaabccbaabccbaabccba",
+ "0123456789876543210",
+ "9876543210123456789",
+ "aababcabcdabcdeabcdefabcdefg",
+ "asdhklgalksdjghalksdjghalksdjgh",
+};
+
+TEST(SuffixSortTest, NaiveSuffixSort) {
+ for (const std::string& test_str : test_strs) {
+ TestSuffixSort(MakeUnsignedString(test_str), NaiveSuffixSort());
+ }
+}
+
+TEST(SuffixSortTest, Sort) {
+ for (const std::string& test_str : test_strs) {
+ TestSuffixSort(MakeUnsignedString(test_str), Sais());
+ }
+}
+
+TEST(SuffixSortTest, Search) {
+ EXPECT_EQ(0, TestSearchSuffixArray("", ""));
+ EXPECT_EQ(0, TestSearchSuffixArray("", "a"));
+ EXPECT_EQ(0, TestSearchSuffixArray("b", ""));
+ EXPECT_EQ(0, TestSearchSuffixArray("b", "a"));
+ EXPECT_EQ(1, TestSearchSuffixArray("b", "c"));
+ EXPECT_EQ(1, TestSearchSuffixArray("b", "bc"));
+ EXPECT_EQ(0, TestSearchSuffixArray("aa", "a"));
+ EXPECT_EQ(1, TestSearchSuffixArray("aa", "aa"));
+}
+
huangs 2017/06/27 20:31:21 Please also adapt the more comprehensive test in
etiennep1 2017/06/28 19:26:09 I added more search unittest based on these test c
+} // namespace zucchini
« chrome/installer/zucchini/suffix_array.h ('K') | « chrome/installer/zucchini/suffix_array.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698