OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/lookup_string_in_fixed_set.h" | 5 #include "net/base/lookup_string_in_fixed_set.h" |
6 | 6 |
7 #include <string.h> | 7 #include <string.h> |
| 8 |
| 9 #include <algorithm> |
| 10 #include <limits> |
8 #include <ostream> | 11 #include <ostream> |
| 12 #include <utility> |
| 13 #include <vector> |
9 | 14 |
| 15 #include "base/base_paths.h" |
| 16 #include "base/files/file_path.h" |
| 17 #include "base/files/file_util.h" |
| 18 #include "base/path_service.h" |
| 19 #include "base/strings/string_util.h" |
| 20 #include "base/strings/stringprintf.h" |
10 #include "testing/gtest/include/gtest/gtest.h" | 21 #include "testing/gtest/include/gtest/gtest.h" |
11 | 22 |
12 namespace net { | 23 namespace net { |
13 namespace { | 24 namespace { |
| 25 namespace effective_tld_names { |
| 26 #include "net/base/registry_controlled_domains/effective_tld_names-inc.cc" |
| 27 } |
14 namespace test1 { | 28 namespace test1 { |
15 #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc
.cc" | 29 #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc
.cc" |
16 } | 30 } |
17 namespace test3 { | 31 namespace test3 { |
18 #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc
.cc" | 32 #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc
.cc" |
19 } | 33 } |
20 namespace test4 { | 34 namespace test4 { |
21 #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc
.cc" | 35 #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc
.cc" |
22 } | 36 } |
23 namespace test5 { | 37 namespace test5 { |
(...skipping 25 matching lines...) Expand all Loading... |
49 TEST_P(Dafsa1Test, BasicTest) { | 63 TEST_P(Dafsa1Test, BasicTest) { |
50 const Expectation& param = GetParam(); | 64 const Expectation& param = GetParam(); |
51 EXPECT_EQ(param.value, LookupInGraph(test1::kDafsa, param.key)); | 65 EXPECT_EQ(param.value, LookupInGraph(test1::kDafsa, param.key)); |
52 } | 66 } |
53 | 67 |
54 const Expectation kBasicTestCases[] = { | 68 const Expectation kBasicTestCases[] = { |
55 {"", -1}, {"j", -1}, {"jp", 0}, {"jjp", -1}, {"jpp", -1}, | 69 {"", -1}, {"j", -1}, {"jp", 0}, {"jjp", -1}, {"jpp", -1}, |
56 {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4}, | 70 {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4}, |
57 }; | 71 }; |
58 | 72 |
| 73 // Helper function for EnumerateDafsaLanaguage. |
| 74 void RecursivelyEnumerateDafsaLanguage( |
| 75 const FixedSetIncrementalLookup& lookup, |
| 76 std::vector<char>* sequence, |
| 77 std::vector<std::pair<std::string, int>>* language) { |
| 78 int result = lookup.GetResultForCurrentSequence(); |
| 79 if (result != kDafsaNotFound) { |
| 80 language->emplace_back(std::string(sequence->begin(), sequence->end()), |
| 81 result); |
| 82 } |
| 83 // Try appending each char value. |
| 84 for (char c = std::numeric_limits<char>::min();; ++c) { |
| 85 FixedSetIncrementalLookup continued_lookup = lookup; |
| 86 if (continued_lookup.Advance(c)) { |
| 87 sequence->push_back(c); |
| 88 size_t saved_language_size = language->size(); |
| 89 RecursivelyEnumerateDafsaLanguage(continued_lookup, sequence, language); |
| 90 CHECK_LT(saved_language_size, language->size()) |
| 91 << "DAFSA includes a branch to nowhere at node: " |
| 92 << std::string(sequence->begin(), sequence->end()); |
| 93 sequence->pop_back(); |
| 94 } |
| 95 if (c == std::numeric_limits<char>::max()) |
| 96 break; |
| 97 } |
| 98 } |
| 99 |
| 100 // Uses FixedSetIncrementalLookup to build a vector of every string in the |
| 101 // language of the DAFSA. |
| 102 template <typename Graph> |
| 103 std::vector<std::pair<std::string, int>> EnumerateDafsaLanguage( |
| 104 const Graph& graph) { |
| 105 FixedSetIncrementalLookup query(graph, sizeof(Graph)); |
| 106 std::vector<char> sequence; |
| 107 std::vector<std::pair<std::string, int>> language; |
| 108 RecursivelyEnumerateDafsaLanguage(query, &sequence, &language); |
| 109 return language; |
| 110 } |
| 111 |
59 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, | 112 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, |
60 Dafsa1Test, | 113 Dafsa1Test, |
61 ::testing::ValuesIn(kBasicTestCases)); | 114 ::testing::ValuesIn(kBasicTestCases)); |
62 | 115 |
63 class Dafsa3Test : public LookupStringInFixedSetTest {}; | 116 class Dafsa3Test : public LookupStringInFixedSetTest {}; |
64 | 117 |
65 // This DAFSA is constructed so that labels begin and end with unique | 118 // This DAFSA is constructed so that labels begin and end with unique |
66 // characters, which makes it impossible to merge labels. Each inner node | 119 // characters, which makes it impossible to merge labels. Each inner node |
67 // is about 100 bytes and a one byte offset can at most add 64 bytes to | 120 // is about 100 bytes and a one byte offset can at most add 64 bytes to |
68 // previous offset. Thus the paths must go over two byte offsets. | 121 // previous offset. Thus the paths must go over two byte offsets. |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
151 | 204 |
152 const Expectation kJoinedSuffixesTestCases[] = { | 205 const Expectation kJoinedSuffixesTestCases[] = { |
153 {"ia", 0}, {"jb", 4}, {"kaa", 0}, {"lbb", 4}, | 206 {"ia", 0}, {"jb", 4}, {"kaa", 0}, {"lbb", 4}, |
154 {"aaa", -1}, {"bbb", -1}, {"maaaa", 0}, {"nbbbb", 0}, | 207 {"aaa", -1}, {"bbb", -1}, {"maaaa", 0}, {"nbbbb", 0}, |
155 }; | 208 }; |
156 | 209 |
157 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, | 210 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, |
158 Dafsa6Test, | 211 Dafsa6Test, |
159 ::testing::ValuesIn(kJoinedSuffixesTestCases)); | 212 ::testing::ValuesIn(kJoinedSuffixesTestCases)); |
160 | 213 |
| 214 // Validates that the generated DAFSA contains exactly the same information as |
| 215 // effective_tld_names.gperf. |
| 216 TEST(LookupStringInFixedSetTest, TestDafsaLanguageMatchesGperfFile) { |
| 217 // This test runs on the real effective TLD names file. |
| 218 auto language = EnumerateDafsaLanguage(effective_tld_names::kDafsa); |
| 219 |
| 220 // Generate a sorted list of every character sequence and result code that the |
| 221 // DAFSA recognizes, mimicing the gperf format. |
| 222 std::vector<std::string> regenerated_gperf; |
| 223 for (const auto& rule : language) { |
| 224 // Mimic the gperf format: "<input-string>, <result-code-int>\n" |
| 225 regenerated_gperf.emplace_back( |
| 226 base::StringPrintf("%s, %d\n", rule.first.c_str(), rule.second)); |
| 227 } |
| 228 // Our .gperf files happen to be sorted, so mimic that. |
| 229 std::sort(regenerated_gperf.begin(), regenerated_gperf.end()); |
| 230 |
| 231 std::string regenerated_gperf_text = |
| 232 base::JoinString(regenerated_gperf, base::StringPiece()); |
| 233 |
| 234 // Second, read the source .gperf file into memory (this is the file from |
| 235 // which effective_tld_names::kDafsa was generated). |
| 236 base::FilePath gperf_file_path; |
| 237 PathService::Get(base::DIR_SOURCE_ROOT, &gperf_file_path); |
| 238 gperf_file_path = gperf_file_path.AppendASCII("net") |
| 239 .AppendASCII("base") |
| 240 .AppendASCII("registry_controlled_domains") |
| 241 .AppendASCII("effective_tld_names.gperf"); |
| 242 std::string actual_gperf_text; |
| 243 EXPECT_TRUE(base::ReadFileToString(gperf_file_path, &actual_gperf_text)); |
| 244 std::string key = "%%\n"; |
| 245 actual_gperf_text.erase(0, actual_gperf_text.find(key) + key.length()); |
| 246 actual_gperf_text.erase(actual_gperf_text.rfind(key)); |
| 247 |
| 248 // |regenerated_gperf_text| should match the body of the .gperf file exactly. |
| 249 EXPECT_EQ(actual_gperf_text, regenerated_gperf_text); |
| 250 |
| 251 // Sanity check to prevent trivial success. |
| 252 EXPECT_GT(actual_gperf_text.length(), 30000U); |
| 253 } |
| 254 |
161 } // namespace | 255 } // namespace |
162 } // namespace net | 256 } // namespace net |
OLD | NEW |