| Index: chrome/installer/zucchini/suffix_array_unittest.cc
|
| diff --git a/chrome/installer/zucchini/suffix_array_unittest.cc b/chrome/installer/zucchini/suffix_array_unittest.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..fff4df8be63e82b5fea75bd7a21d46415f387b2f
|
| --- /dev/null
|
| +++ b/chrome/installer/zucchini/suffix_array_unittest.cc
|
| @@ -0,0 +1,323 @@
|
| +// Copyright 2017 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "chrome/installer/zucchini/suffix_array.h"
|
| +
|
| +#include <algorithm>
|
| +#include <cstddef>
|
| +#include <initializer_list>
|
| +#include <string>
|
| +#include <vector>
|
| +
|
| +#include "testing/gtest/include/gtest/gtest.h"
|
| +
|
| +namespace zucchini {
|
| +
|
| +using ustring = std::basic_string<unsigned char>;
|
| +
|
| +constexpr uint16_t kNumChar = 256;
|
| +
|
| +ustring MakeUnsignedString(const std::string& str) {
|
| + return {str.begin(), str.end()};
|
| +}
|
| +
|
| +template <class T>
|
| +std::vector<T> MakeVector(const std::initializer_list<T>& ilist) {
|
| + return {ilist.begin(), ilist.end()};
|
| +}
|
| +
|
| +void TestSlPartition(std::initializer_list<Sais::SLType> expected_sl_partition,
|
| + std::initializer_list<size_t> expected_lms_indices,
|
| + std::string str) {
|
| + using SaisImpl = Sais::Implementation<size_t, uint16_t>;
|
| +
|
| + std::vector<Sais::SLType> sl_partition(str.size());
|
| + EXPECT_EQ(expected_lms_indices.size(),
|
| + SaisImpl::BuildSLPartition(str.begin(), str.size(), kNumChar,
|
| + sl_partition.rbegin()));
|
| + EXPECT_EQ(MakeVector(expected_sl_partition), sl_partition);
|
| +
|
| + std::vector<size_t> lms_indices(expected_lms_indices.size());
|
| + SaisImpl::FindLmsSuffixes(expected_sl_partition, lms_indices.begin());
|
| + EXPECT_EQ(MakeVector(expected_lms_indices), lms_indices);
|
| +}
|
| +
|
| +TEST(SaisTest, BuildSLPartition) {
|
| + TestSlPartition({}, {}, "");
|
| + TestSlPartition(
|
| + {
|
| + Sais::LType,
|
| + },
|
| + {}, "a");
|
| + TestSlPartition(
|
| + {
|
| + Sais::LType, Sais::LType,
|
| + },
|
| + {}, "ba");
|
| + TestSlPartition(
|
| + {
|
| + Sais::SType, Sais::LType,
|
| + },
|
| + {}, "ab");
|
| + TestSlPartition(
|
| + {
|
| + Sais::SType, Sais::SType, Sais::LType,
|
| + },
|
| + {}, "aab");
|
| + TestSlPartition(
|
| + {
|
| + Sais::LType, Sais::LType, Sais::LType,
|
| + },
|
| + {}, "bba");
|
| + TestSlPartition(
|
| + {
|
| + Sais::LType, Sais::SType, Sais::LType,
|
| + },
|
| + {1}, "bab");
|
| + TestSlPartition(
|
| + {
|
| + Sais::LType, Sais::SType, Sais::SType, Sais::LType,
|
| + },
|
| + {1}, "baab");
|
| +
|
| + TestSlPartition(
|
| + {
|
| + Sais::LType, // zucchini
|
| + Sais::LType, // ucchini
|
| + Sais::SType, // cchini
|
| + Sais::SType, // chini
|
| + Sais::SType, // hini
|
| + Sais::SType, // ini
|
| + Sais::LType, // ni
|
| + Sais::LType, // i
|
| + },
|
| + {2}, "zucchini");
|
| +}
|
| +
|
| +std::vector<size_t> BucketCount(const std::initializer_list<unsigned char> str,
|
| + uint16_t max_key) {
|
| + using SaisImpl = Sais::Implementation<size_t, uint16_t>;
|
| + return SaisImpl::MakeBucketCount(str.begin(), str.size(), max_key);
|
| +}
|
| +
|
| +TEST(SaisTest, BucketCount) {
|
| + using vec = std::vector<size_t>;
|
| +
|
| + EXPECT_EQ(vec({0, 0, 0, 0}), BucketCount({}, 4));
|
| + EXPECT_EQ(vec({1, 0, 0, 0}), BucketCount({0}, 4));
|
| + EXPECT_EQ(vec({0, 2, 0, 1}), BucketCount({1, 1, 3}, 4));
|
| +}
|
| +
|
| +std::vector<size_t> InducedSortSubstring(ustring str) {
|
| + using SaisImpl = Sais::Implementation<size_t, uint16_t>;
|
| + std::vector<Sais::SLType> sl_partition(str.size());
|
| + size_t lms_count = SaisImpl::BuildSLPartition(
|
| + str.begin(), str.size(), kNumChar, sl_partition.rbegin());
|
| + std::vector<size_t> lms_indices(lms_count);
|
| + SaisImpl::FindLmsSuffixes(sl_partition, lms_indices.begin());
|
| + auto buckets = SaisImpl::MakeBucketCount(str.begin(), str.size(), kNumChar);
|
| +
|
| + std::vector<size_t> suffix_array(str.size());
|
| + SaisImpl::InducedSort(str, str.size(), sl_partition, lms_indices, buckets,
|
| + suffix_array.begin());
|
| +
|
| + return suffix_array;
|
| +}
|
| +
|
| +TEST(SaisTest, InducedSortSubstring) {
|
| + using vec = std::vector<size_t>;
|
| +
|
| + auto us = MakeUnsignedString;
|
| +
|
| + // L; a$
|
| + EXPECT_EQ(vec({0}), InducedSortSubstring(us("a")));
|
| +
|
| + // SL; ab$, b$
|
| + EXPECT_EQ(vec({0, 1}), InducedSortSubstring(us("ab")));
|
| +
|
| + // LL; a$, ba$
|
| + EXPECT_EQ(vec({1, 0}), InducedSortSubstring(us("ba")));
|
| +
|
| + // SLL; a$, aba$, ba$
|
| + EXPECT_EQ(vec({2, 0, 1}), InducedSortSubstring(us("aba")));
|
| +
|
| + // LSL; ab$, b$, ba
|
| + EXPECT_EQ(vec({1, 2, 0}), InducedSortSubstring(us("bab")));
|
| +
|
| + // SSL; aab$, ab$, b$
|
| + EXPECT_EQ(vec({0, 1, 2}), InducedSortSubstring(us("aab")));
|
| +
|
| + // LSSL; aab$, ab$, b$, ba
|
| + EXPECT_EQ(vec({1, 2, 3, 0}), InducedSortSubstring(us("baab")));
|
| +}
|
| +
|
| +template <class Algorithm>
|
| +void TestSuffixSort(ustring test_str) {
|
| + std::vector<size_t> suffix_array =
|
| + MakeSuffixArray<Algorithm>(test_str, kNumChar);
|
| + EXPECT_EQ(test_str.size(), suffix_array.size());
|
| +
|
| + // Expect that I[] is a permutation of [0, len].
|
| + std::vector<size_t> sorted_suffix(suffix_array.begin(), suffix_array.end());
|
| + std::sort(sorted_suffix.begin(), sorted_suffix.end());
|
| + for (size_t i = 0; i < test_str.size(); ++i)
|
| + EXPECT_EQ(i, sorted_suffix[i]);
|
| +
|
| + // Expect that all suffixes are strictly ordered.
|
| + auto end = test_str.end();
|
| + for (size_t i = 1; i < test_str.size(); ++i) {
|
| + auto suf1 = test_str.begin() + suffix_array[i - 1];
|
| + auto suf2 = test_str.begin() + suffix_array[i];
|
| + bool is_less = std::lexicographical_compare(suf1, end, suf2, end);
|
| + EXPECT_TRUE(is_less);
|
| + }
|
| +}
|
| +
|
| +constexpr const char* test_strs[] = {
|
| + "",
|
| + "a",
|
| + "aa",
|
| + "za",
|
| + "CACAO",
|
| + "aaaaa",
|
| + "banana",
|
| + "tobeornottobe",
|
| + "The quick brown fox jumps over the lazy dog.",
|
| + "elephantelephantelephantelephantelephant",
|
| + "walawalawashington",
|
| + "-------------------------",
|
| + "011010011001011010010110011010010",
|
| + "3141592653589793238462643383279502884197169399375105",
|
| + "\xFF\xFE\xFF\xFE\xFD\x80\x30\x31\x32\x80\x30\xFF\x01\xAB\xCD",
|
| + "abccbaabccbaabccbaabccbaabccbaabccbaabccbaabccba",
|
| + "0123456789876543210",
|
| + "9876543210123456789",
|
| + "aababcabcdabcdeabcdefabcdefg",
|
| + "asdhklgalksdjghalksdjghalksdjgh",
|
| +};
|
| +
|
| +TEST(SuffixSortTest, NaiveSuffixSort) {
|
| + for (const std::string& test_str : test_strs) {
|
| + TestSuffixSort<NaiveSuffixSort>(MakeUnsignedString(test_str));
|
| + }
|
| +}
|
| +
|
| +TEST(SuffixSortTest, SaisSort) {
|
| + for (const std::string& test_str : test_strs) {
|
| + TestSuffixSort<Sais>(MakeUnsignedString(test_str));
|
| + }
|
| +}
|
| +
|
| +// Test with sequence that has every character.
|
| +TEST(SuffixSortTest, AllChar) {
|
| + std::vector<unsigned char> all_char(kNumChar);
|
| + std::iota(all_char.begin(), all_char.end(), 0);
|
| +
|
| + {
|
| + std::vector<size_t> suffix_array =
|
| + MakeSuffixArray<Sais>(all_char, kNumChar);
|
| + for (size_t i = 0; i < kNumChar; ++i)
|
| + EXPECT_EQ(i, suffix_array[i]);
|
| + }
|
| +
|
| + std::vector<unsigned char> all_char_reverse(all_char.rbegin(),
|
| + all_char.rend());
|
| + {
|
| + std::vector<size_t> suffix_array =
|
| + MakeSuffixArray<Sais>(all_char_reverse, kNumChar);
|
| + for (size_t i = 0; i < kNumChar; ++i)
|
| + EXPECT_EQ(kNumChar - i - 1, suffix_array[i]);
|
| + }
|
| +}
|
| +
|
| +void TestSuffixLowerBound(ustring base_str, ustring search_str) {
|
| + std::vector<size_t> suffix_array =
|
| + MakeSuffixArray<NaiveSuffixSort>(base_str, kNumChar);
|
| +
|
| + auto pos = SuffixLowerBound(suffix_array, base_str.begin(),
|
| + search_str.begin(), search_str.end());
|
| +
|
| + auto end = base_str.end();
|
| + if (pos != suffix_array.begin()) {
|
| + // Previous suffix is less than |search_str|.
|
| + auto suf = base_str.begin() + pos[-1];
|
| + bool is_less = std::lexicographical_compare(suf, end, search_str.begin(),
|
| + search_str.end());
|
| + EXPECT_TRUE(is_less);
|
| + }
|
| + if (pos != suffix_array.end()) {
|
| + // Current suffix is greater of equal to |search_str|.
|
| + auto suf = base_str.begin() + *pos;
|
| + bool is_less = std::lexicographical_compare(suf, end, search_str.begin(),
|
| + search_str.end());
|
| + EXPECT_FALSE(is_less);
|
| + }
|
| +}
|
| +
|
| +TEST(SuffixArrayTest, LowerBound) {
|
| + auto us = MakeUnsignedString;
|
| +
|
| + TestSuffixLowerBound(us(""), us(""));
|
| + TestSuffixLowerBound(us(""), us("a"));
|
| + TestSuffixLowerBound(us("b"), us(""));
|
| + TestSuffixLowerBound(us("b"), us("a"));
|
| + TestSuffixLowerBound(us("b"), us("c"));
|
| + TestSuffixLowerBound(us("b"), us("bc"));
|
| + TestSuffixLowerBound(us("aa"), us("a"));
|
| + TestSuffixLowerBound(us("aa"), us("aa"));
|
| +
|
| + ustring sentence = us("the quick brown fox jumps over the lazy dog.");
|
| + // Entire string: exact and unique.
|
| + TestSuffixLowerBound(sentence, sentence);
|
| + // Empty string: exact and non-unique.
|
| + TestSuffixLowerBound(sentence, us(""));
|
| + // Exact and unique suffix matches.
|
| + TestSuffixLowerBound(sentence, us("."));
|
| + TestSuffixLowerBound(sentence, us("the lazy dog."));
|
| + // Exact and unique non-suffix matches.
|
| + TestSuffixLowerBound(sentence, us("quick"));
|
| + TestSuffixLowerBound(sentence, us("the quick"));
|
| + // Partial and unique matches.
|
| + TestSuffixLowerBound(sentence, us("fox jumps with the hosps"));
|
| + TestSuffixLowerBound(sentence, us("xyz"));
|
| + // Exact and non-unique match: take lexicographical first.
|
| + TestSuffixLowerBound(sentence, us("the"));
|
| + TestSuffixLowerBound(sentence, us(" "));
|
| + // Partial and non-unique match.
|
| + // query < "the l"... < "the q"...
|
| + TestSuffixLowerBound(sentence, us("the apple"));
|
| + // "the l"... < query < "the q"...
|
| + TestSuffixLowerBound(sentence, us("the opera"));
|
| + // "the l"... < "the q"... < query
|
| + TestSuffixLowerBound(sentence, us("the zebra"));
|
| + // Prefix match dominates suffix match (unique).
|
| + TestSuffixLowerBound(sentence, us("over quick brown fox"));
|
| + // Empty matchs.
|
| + TestSuffixLowerBound(sentence, us(","));
|
| + TestSuffixLowerBound(sentence, us("1234"));
|
| + TestSuffixLowerBound(sentence, us("THE QUICK BROWN FOX"));
|
| + TestSuffixLowerBound(sentence, us("(the"));
|
| +}
|
| +
|
| +TEST(SuffixArrayTest, LowerBoundExact) {
|
| + for (const std::string& test_str : test_strs) {
|
| + ustring test_ustr = MakeUnsignedString(test_str);
|
| +
|
| + std::vector<size_t> suffix_array =
|
| + MakeSuffixArray<Sais>(test_ustr, kNumChar);
|
| +
|
| + for (size_t lo = 0; lo < test_str.size(); ++lo) {
|
| + for (size_t hi = lo + 1; hi <= test_str.size(); ++hi) {
|
| + ustring query(test_ustr.begin() + lo, test_ustr.begin() + hi);
|
| + ASSERT_EQ(query.size(), hi - lo);
|
| + auto pos = SuffixLowerBound(suffix_array, test_ustr.begin(),
|
| + query.begin(), query.end());
|
| + EXPECT_TRUE(
|
| + std::equal(query.begin(), query.end(), test_ustr.begin() + *pos));
|
| + }
|
| + }
|
| + }
|
| +}
|
| +
|
| +} // namespace zucchini
|
|
|