| OLD | NEW |
| (Empty) |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "net/base/lookup_string_in_fixed_set.h" | |
| 6 | |
| 7 #include <string.h> | |
| 8 | |
| 9 #include <algorithm> | |
| 10 #include <limits> | |
| 11 #include <ostream> | |
| 12 #include <utility> | |
| 13 #include <vector> | |
| 14 | |
| 15 #include "base/base_paths.h" | |
| 16 #include "base/files/file_path.h" | |
| 17 #include "base/files/file_util.h" | |
| 18 #include "base/path_service.h" | |
| 19 #include "base/strings/string_util.h" | |
| 20 #include "base/strings/stringprintf.h" | |
| 21 #include "testing/gtest/include/gtest/gtest.h" | |
| 22 | |
| 23 namespace net { | |
| 24 namespace { | |
| 25 namespace test1 { | |
| 26 #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc
.cc" | |
| 27 } | |
| 28 namespace test3 { | |
| 29 #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc
.cc" | |
| 30 } | |
| 31 namespace test4 { | |
| 32 #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc
.cc" | |
| 33 } | |
| 34 namespace test5 { | |
| 35 #include "net/base/registry_controlled_domains/effective_tld_names_unittest5-inc
.cc" | |
| 36 } | |
| 37 namespace test6 { | |
| 38 #include "net/base/registry_controlled_domains/effective_tld_names_unittest6-inc
.cc" | |
| 39 } | |
| 40 | |
| 41 struct Expectation { | |
| 42 const char* const key; | |
| 43 int value; | |
| 44 }; | |
| 45 | |
| 46 void PrintTo(const Expectation& expectation, std::ostream* os) { | |
| 47 *os << "{\"" << expectation.key << "\", " << expectation.value << "}"; | |
| 48 } | |
| 49 | |
| 50 class LookupStringInFixedSetTest : public testing::TestWithParam<Expectation> { | |
| 51 protected: | |
| 52 template <size_t N> | |
| 53 int LookupInGraph(const unsigned char(&graph)[N], const char* key) { | |
| 54 return LookupStringInFixedSet(graph, N, key, strlen(key)); | |
| 55 } | |
| 56 }; | |
| 57 | |
| 58 class Dafsa1Test : public LookupStringInFixedSetTest {}; | |
| 59 | |
| 60 TEST_P(Dafsa1Test, BasicTest) { | |
| 61 const Expectation& param = GetParam(); | |
| 62 EXPECT_EQ(param.value, LookupInGraph(test1::kDafsa, param.key)); | |
| 63 } | |
| 64 | |
| 65 const Expectation kBasicTestCases[] = { | |
| 66 {"", -1}, {"j", -1}, {"jp", 0}, {"jjp", -1}, {"jpp", -1}, | |
| 67 {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4}, | |
| 68 }; | |
| 69 | |
| 70 // Helper function for EnumerateDafsaLanaguage. | |
| 71 void RecursivelyEnumerateDafsaLanguage(const FixedSetIncrementalLookup& lookup, | |
| 72 std::vector<char>* sequence, | |
| 73 std::vector<std::string>* language) { | |
| 74 int result = lookup.GetResultForCurrentSequence(); | |
| 75 if (result != kDafsaNotFound) { | |
| 76 std::string line(sequence->begin(), sequence->end()); | |
| 77 line += base::StringPrintf(", %d", result); | |
| 78 language->emplace_back(std::move(line)); | |
| 79 } | |
| 80 // Try appending each char value. | |
| 81 for (char c = std::numeric_limits<char>::min();; ++c) { | |
| 82 FixedSetIncrementalLookup continued_lookup = lookup; | |
| 83 if (continued_lookup.Advance(c)) { | |
| 84 sequence->push_back(c); | |
| 85 size_t saved_language_size = language->size(); | |
| 86 RecursivelyEnumerateDafsaLanguage(continued_lookup, sequence, language); | |
| 87 CHECK_LT(saved_language_size, language->size()) | |
| 88 << "DAFSA includes a branch to nowhere at node: " | |
| 89 << std::string(sequence->begin(), sequence->end()); | |
| 90 sequence->pop_back(); | |
| 91 } | |
| 92 if (c == std::numeric_limits<char>::max()) | |
| 93 break; | |
| 94 } | |
| 95 } | |
| 96 | |
| 97 // Uses FixedSetIncrementalLookup to build a vector of every string in the | |
| 98 // language of the DAFSA. | |
| 99 template <typename Graph> | |
| 100 std::vector<std::string> EnumerateDafsaLanguage(const Graph& graph) { | |
| 101 FixedSetIncrementalLookup query(graph, sizeof(Graph)); | |
| 102 std::vector<char> sequence; | |
| 103 std::vector<std::string> language; | |
| 104 RecursivelyEnumerateDafsaLanguage(query, &sequence, &language); | |
| 105 return language; | |
| 106 } | |
| 107 | |
| 108 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, | |
| 109 Dafsa1Test, | |
| 110 ::testing::ValuesIn(kBasicTestCases)); | |
| 111 | |
| 112 class Dafsa3Test : public LookupStringInFixedSetTest {}; | |
| 113 | |
| 114 // This DAFSA is constructed so that labels begin and end with unique | |
| 115 // characters, which makes it impossible to merge labels. Each inner node | |
| 116 // is about 100 bytes and a one byte offset can at most add 64 bytes to | |
| 117 // previous offset. Thus the paths must go over two byte offsets. | |
| 118 TEST_P(Dafsa3Test, TestDafsaTwoByteOffsets) { | |
| 119 const Expectation& param = GetParam(); | |
| 120 EXPECT_EQ(param.value, LookupInGraph(test3::kDafsa, param.key)); | |
| 121 } | |
| 122 | |
| 123 const Expectation kTwoByteOffsetTestCases[] = { | |
| 124 {"0________________________________________________________________________" | |
| 125 "____________________________0", | |
| 126 0}, | |
| 127 {"7________________________________________________________________________" | |
| 128 "____________________________7", | |
| 129 4}, | |
| 130 {"a________________________________________________________________________" | |
| 131 "____________________________8", | |
| 132 -1}, | |
| 133 }; | |
| 134 | |
| 135 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, | |
| 136 Dafsa3Test, | |
| 137 ::testing::ValuesIn(kTwoByteOffsetTestCases)); | |
| 138 | |
| 139 class Dafsa4Test : public LookupStringInFixedSetTest {}; | |
| 140 | |
| 141 // This DAFSA is constructed so that labels begin and end with unique | |
| 142 // characters, which makes it impossible to merge labels. The byte array | |
| 143 // has a size of ~54k. A two byte offset can add at most add 8k to the | |
| 144 // previous offset. Since we can skip only forward in memory, the nodes | |
| 145 // representing the return values must be located near the end of the byte | |
| 146 // array. The probability that we can reach from an arbitrary inner node to | |
| 147 // a return value without using a three byte offset is small (but not zero). | |
| 148 // The test is repeated with some different keys and with a reasonable | |
| 149 // probability at least one of the tested paths has go over a three byte | |
| 150 // offset. | |
| 151 TEST_P(Dafsa4Test, TestDafsaThreeByteOffsets) { | |
| 152 const Expectation& param = GetParam(); | |
| 153 EXPECT_EQ(param.value, LookupInGraph(test4::kDafsa, param.key)); | |
| 154 } | |
| 155 | |
| 156 const Expectation kThreeByteOffsetTestCases[] = { | |
| 157 {"Z6_______________________________________________________________________" | |
| 158 "_____________________________Z6", | |
| 159 0}, | |
| 160 {"Z7_______________________________________________________________________" | |
| 161 "_____________________________Z7", | |
| 162 4}, | |
| 163 {"Za_______________________________________________________________________" | |
| 164 "_____________________________Z8", | |
| 165 -1}, | |
| 166 }; | |
| 167 | |
| 168 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, | |
| 169 Dafsa4Test, | |
| 170 ::testing::ValuesIn(kThreeByteOffsetTestCases)); | |
| 171 | |
| 172 class Dafsa5Test : public LookupStringInFixedSetTest {}; | |
| 173 | |
| 174 // This DAFSA is constructed from words with similar prefixes but distinct | |
| 175 // suffixes. The DAFSA will then form a trie with the implicit source node | |
| 176 // as root. | |
| 177 TEST_P(Dafsa5Test, TestDafsaJoinedPrefixes) { | |
| 178 const Expectation& param = GetParam(); | |
| 179 EXPECT_EQ(param.value, LookupInGraph(test5::kDafsa, param.key)); | |
| 180 } | |
| 181 | |
| 182 const Expectation kJoinedPrefixesTestCases[] = { | |
| 183 {"ai", 0}, {"bj", 4}, {"aak", 0}, {"bbl", 4}, | |
| 184 {"aaa", -1}, {"bbb", -1}, {"aaaam", 0}, {"bbbbn", 0}, | |
| 185 }; | |
| 186 | |
| 187 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, | |
| 188 Dafsa5Test, | |
| 189 ::testing::ValuesIn(kJoinedPrefixesTestCases)); | |
| 190 | |
| 191 class Dafsa6Test : public LookupStringInFixedSetTest {}; | |
| 192 | |
| 193 // This DAFSA is constructed from words with similar suffixes but distinct | |
| 194 // prefixes. The DAFSA will then form a trie with the implicit sink node as | |
| 195 // root. | |
| 196 TEST_P(Dafsa6Test, TestDafsaJoinedSuffixes) { | |
| 197 const Expectation& param = GetParam(); | |
| 198 EXPECT_EQ(param.value, LookupInGraph(test6::kDafsa, param.key)); | |
| 199 } | |
| 200 | |
| 201 const Expectation kJoinedSuffixesTestCases[] = { | |
| 202 {"ia", 0}, {"jb", 4}, {"kaa", 0}, {"lbb", 4}, | |
| 203 {"aaa", -1}, {"bbb", -1}, {"maaaa", 0}, {"nbbbb", 0}, | |
| 204 }; | |
| 205 | |
| 206 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, | |
| 207 Dafsa6Test, | |
| 208 ::testing::ValuesIn(kJoinedSuffixesTestCases)); | |
| 209 | |
| 210 // Validates that the generated DAFSA contains exactly the same information as | |
| 211 // effective_tld_names_unittest1.gperf. | |
| 212 TEST(LookupStringInFixedSetTest, Dafsa1EnumerateLanguage) { | |
| 213 auto language = EnumerateDafsaLanguage(test1::kDafsa); | |
| 214 | |
| 215 // These are the lines of effective_tld_names_unittest1.gperf, in sorted | |
| 216 // order. | |
| 217 std::vector<std::string> expected_language = { | |
| 218 "ac.jp, 0", "b.c, 1", "bar.baz.com, 0", "bar.jp, 2", | |
| 219 "baz.bar.jp, 2", "c, 2", "jp, 0", "no, 0", | |
| 220 "pref.bar.jp, 1", "priv.no, 4", "private, 4", "xn--fiqs8s, 0", | |
| 221 }; | |
| 222 | |
| 223 EXPECT_EQ(expected_language, language); | |
| 224 } | |
| 225 | |
| 226 // Validates that the generated DAFSA contains exactly the same information as | |
| 227 // effective_tld_names_unittest5.gperf. | |
| 228 TEST(LookupStringInFixedSetTest, Dafsa5EnumerateLanguage) { | |
| 229 auto language = EnumerateDafsaLanguage(test5::kDafsa); | |
| 230 | |
| 231 std::vector<std::string> expected_language = { | |
| 232 "aaaam, 0", "aak, 0", "ai, 0", "bbbbn, 0", "bbl, 4", "bj, 4", | |
| 233 }; | |
| 234 | |
| 235 EXPECT_EQ(expected_language, language); | |
| 236 } | |
| 237 | |
| 238 // Validates that the generated DAFSA contains exactly the same information as | |
| 239 // effective_tld_names_unittest6.gperf. | |
| 240 TEST(LookupStringInFixedSetTest, Dafsa6EnumerateLanguage) { | |
| 241 auto language = EnumerateDafsaLanguage(test6::kDafsa); | |
| 242 | |
| 243 std::vector<std::string> expected_language = { | |
| 244 "ia, 0", "jb, 4", "kaa, 0", "lbb, 4", "maaaa, 0", "nbbbb, 0", | |
| 245 }; | |
| 246 | |
| 247 EXPECT_EQ(expected_language, language); | |
| 248 } | |
| 249 | |
| 250 } // namespace | |
| 251 } // namespace net | |
| OLD | NEW |