OLD | NEW |
---|---|
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/lookup_string_in_fixed_set.h" | 5 #include "net/base/lookup_string_in_fixed_set.h" |
6 | 6 |
7 #include <string.h> | 7 #include <string.h> |
8 | |
9 #include <algorithm> | |
10 #include <limits> | |
8 #include <ostream> | 11 #include <ostream> |
12 #include <utility> | |
13 #include <vector> | |
9 | 14 |
15 #include "base/base_paths.h" | |
16 #include "base/files/file_path.h" | |
17 #include "base/files/file_util.h" | |
18 #include "base/path_service.h" | |
19 #include "base/strings/string_util.h" | |
20 #include "base/strings/stringprintf.h" | |
10 #include "testing/gtest/include/gtest/gtest.h" | 21 #include "testing/gtest/include/gtest/gtest.h" |
11 | 22 |
12 namespace net { | 23 namespace net { |
13 namespace { | 24 namespace { |
25 namespace effective_tld_names { | |
26 #include "net/base/registry_controlled_domains/effective_tld_names-inc.cc" | |
27 } | |
14 namespace test1 { | 28 namespace test1 { |
15 #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc .cc" | 29 #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc .cc" |
16 } | 30 } |
17 namespace test3 { | 31 namespace test3 { |
18 #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc .cc" | 32 #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc .cc" |
19 } | 33 } |
20 namespace test4 { | 34 namespace test4 { |
21 #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc .cc" | 35 #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc .cc" |
22 } | 36 } |
23 namespace test5 { | 37 namespace test5 { |
(...skipping 25 matching lines...) Expand all Loading... | |
49 TEST_P(Dafsa1Test, BasicTest) { | 63 TEST_P(Dafsa1Test, BasicTest) { |
50 const Expectation& param = GetParam(); | 64 const Expectation& param = GetParam(); |
51 EXPECT_EQ(param.value, LookupInGraph(test1::kDafsa, param.key)); | 65 EXPECT_EQ(param.value, LookupInGraph(test1::kDafsa, param.key)); |
52 } | 66 } |
53 | 67 |
54 const Expectation kBasicTestCases[] = { | 68 const Expectation kBasicTestCases[] = { |
55 {"", -1}, {"j", -1}, {"jp", 0}, {"jjp", -1}, {"jpp", -1}, | 69 {"", -1}, {"j", -1}, {"jp", 0}, {"jjp", -1}, {"jpp", -1}, |
56 {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4}, | 70 {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4}, |
57 }; | 71 }; |
58 | 72 |
73 // Helper function for EnumerateDafsaLanaguage. | |
74 void RecursivelyEnumerateDafsaLanguage( | |
75 const FixedSetIncrementalLookup& lookup, | |
76 std::string* sequence, | |
77 std::vector<std::pair<std::string, int>>* language) { | |
78 int result = lookup.GetResultForCurrentSequence(); | |
79 if (result != kDafsaNotFound) { | |
80 language->emplace_back(std::string(sequence->begin(), sequence->end()), | |
81 result); | |
82 } | |
83 // Try appending each char value. | |
84 for (char c = std::numeric_limits<char>::min();; ++c) { | |
85 FixedSetIncrementalLookup continued_lookup = lookup; | |
86 if (continued_lookup.Advance(c)) { | |
87 sequence->push_back(c); | |
88 size_t saved_language_size = language->size(); | |
89 RecursivelyEnumerateDafsaLanguage(continued_lookup, sequence, language); | |
90 CHECK_LT(saved_language_size, language->size()) | |
91 << "DAFSA includes a branch to nowhere at node: " << *sequence; | |
92 sequence->pop_back(); | |
93 } | |
94 if (c == std::numeric_limits<char>::max()) | |
95 break; | |
96 } | |
97 } | |
98 | |
99 // Uses FixedSetIncrementalLookup to build a vector of every string in the | |
100 // language of the DAFSA. | |
101 template <typename Graph> | |
102 std::vector<std::pair<std::string, int>> EnumerateDafsaLanguage( | |
103 const Graph& graph) { | |
104 FixedSetIncrementalLookup query(graph, sizeof(Graph)); | |
105 std::string sequence; | |
106 std::vector<std::pair<std::string, int>> language; | |
107 RecursivelyEnumerateDafsaLanguage(query, &sequence, &language); | |
108 return language; | |
109 } | |
110 | |
59 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, | 111 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, |
60 Dafsa1Test, | 112 Dafsa1Test, |
61 ::testing::ValuesIn(kBasicTestCases)); | 113 ::testing::ValuesIn(kBasicTestCases)); |
62 | 114 |
63 class Dafsa3Test : public LookupStringInFixedSetTest {}; | 115 class Dafsa3Test : public LookupStringInFixedSetTest {}; |
64 | 116 |
65 // This DAFSA is constructed so that labels begin and end with unique | 117 // This DAFSA is constructed so that labels begin and end with unique |
66 // characters, which makes it impossible to merge labels. Each inner node | 118 // characters, which makes it impossible to merge labels. Each inner node |
67 // is about 100 bytes and a one byte offset can at most add 64 bytes to | 119 // is about 100 bytes and a one byte offset can at most add 64 bytes to |
68 // previous offset. Thus the paths must go over two byte offsets. | 120 // previous offset. Thus the paths must go over two byte offsets. |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
151 | 203 |
152 const Expectation kJoinedSuffixesTestCases[] = { | 204 const Expectation kJoinedSuffixesTestCases[] = { |
153 {"ia", 0}, {"jb", 4}, {"kaa", 0}, {"lbb", 4}, | 205 {"ia", 0}, {"jb", 4}, {"kaa", 0}, {"lbb", 4}, |
154 {"aaa", -1}, {"bbb", -1}, {"maaaa", 0}, {"nbbbb", 0}, | 206 {"aaa", -1}, {"bbb", -1}, {"maaaa", 0}, {"nbbbb", 0}, |
155 }; | 207 }; |
156 | 208 |
157 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, | 209 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, |
158 Dafsa6Test, | 210 Dafsa6Test, |
159 ::testing::ValuesIn(kJoinedSuffixesTestCases)); | 211 ::testing::ValuesIn(kJoinedSuffixesTestCases)); |
160 | 212 |
213 // Validates that the generated DAFSA contains exactly the same information as | |
214 // effective_tld_names.gperf. | |
Ryan Sleevi
2017/01/25 19:11:04
So in general, I try to push back on these "Test a
ncarter (slow)
2017/01/26 23:29:11
Regarding coupling: these unittests are already wr
Ryan Sleevi
2017/01/27 00:08:24
Yeah, this used to be an implementation detail of
ncarter (slow)
2017/02/15 23:42:11
I've redone these unittests so that they just run
| |
215 TEST(LookupStringInFixedSetTest, TestDafsaLanguageMatchesGperfFile) { | |
216 // This test runs on the real effective TLD names file. | |
217 auto language = EnumerateDafsaLanguage(effective_tld_names::kDafsa); | |
218 | |
219 // Generate a sorted list of every character sequence and result code that the | |
220 // DAFSA recognizes, mimicing the gperf format. | |
221 std::vector<std::string> regenerated_gperf; | |
222 for (const auto& rule : language) { | |
223 // Mimic the gperf format: "<input-string>, <result-code-int>\n" | |
224 regenerated_gperf.emplace_back( | |
225 base::StringPrintf("%s, %d\n", rule.first.c_str(), rule.second)); | |
226 } | |
227 // Our .gperf files happen to be sorted, so mimic that. | |
228 std::sort(regenerated_gperf.begin(), regenerated_gperf.end()); | |
229 | |
230 std::string regenerated_gperf_text = | |
231 base::JoinString(regenerated_gperf, base::StringPiece()); | |
232 | |
233 // Second, read the source .gperf file into memory (this is the file from | |
234 // which effective_tld_names::kDafsa was generated). | |
235 base::FilePath gperf_file_path; | |
236 PathService::Get(base::DIR_SOURCE_ROOT, &gperf_file_path); | |
237 gperf_file_path = gperf_file_path.AppendASCII("net") | |
238 .AppendASCII("base") | |
239 .AppendASCII("registry_controlled_domains") | |
240 .AppendASCII("effective_tld_names.gperf"); | |
241 std::string actual_gperf_text; | |
242 EXPECT_TRUE(base::ReadFileToString(gperf_file_path, &actual_gperf_text)); | |
243 std::string key = "%%\n"; | |
244 actual_gperf_text.erase(0, actual_gperf_text.find(key) + key.length()); | |
245 actual_gperf_text.erase(actual_gperf_text.rfind(key)); | |
246 | |
247 // |regenerated_gperf_text| should match the body of the .gperf file exactly. | |
248 EXPECT_EQ(actual_gperf_text, regenerated_gperf_text); | |
249 | |
250 // Sanity check to prevent trivial success. | |
251 EXPECT_GT(actual_gperf_text.length(), 30000U); | |
252 } | |
253 | |
161 } // namespace | 254 } // namespace |
162 } // namespace net | 255 } // namespace net |
OLD | NEW |