Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(516)

Side by Side Diff: net/base/lookup_string_in_fixed_set_unittest.cc

Issue 2641953009: [1 of 4] Support prefix queries against the effective_tld_names DAFSA (Closed)
Patch Set: Rebase Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/lookup_string_in_fixed_set.h" 5 #include "net/base/lookup_string_in_fixed_set.h"
6 6
7 #include <string.h> 7 #include <string.h>
8
9 #include <algorithm>
10 #include <limits>
8 #include <ostream> 11 #include <ostream>
12 #include <utility>
13 #include <vector>
9 14
15 #include "base/base_paths.h"
16 #include "base/files/file_path.h"
17 #include "base/files/file_util.h"
18 #include "base/path_service.h"
19 #include "base/strings/string_util.h"
20 #include "base/strings/stringprintf.h"
10 #include "testing/gtest/include/gtest/gtest.h" 21 #include "testing/gtest/include/gtest/gtest.h"
11 22
12 namespace net { 23 namespace net {
13 namespace { 24 namespace {
25 namespace effective_tld_names {
26 #include "net/base/registry_controlled_domains/effective_tld_names-inc.cc"
27 }
14 namespace test1 { 28 namespace test1 {
15 #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc .cc" 29 #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc .cc"
16 } 30 }
17 namespace test3 { 31 namespace test3 {
18 #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc .cc" 32 #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc .cc"
19 } 33 }
20 namespace test4 { 34 namespace test4 {
21 #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc .cc" 35 #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc .cc"
22 } 36 }
23 namespace test5 { 37 namespace test5 {
(...skipping 25 matching lines...) Expand all
49 TEST_P(Dafsa1Test, BasicTest) { 63 TEST_P(Dafsa1Test, BasicTest) {
50 const Expectation& param = GetParam(); 64 const Expectation& param = GetParam();
51 EXPECT_EQ(param.value, LookupInGraph(test1::kDafsa, param.key)); 65 EXPECT_EQ(param.value, LookupInGraph(test1::kDafsa, param.key));
52 } 66 }
53 67
54 const Expectation kBasicTestCases[] = { 68 const Expectation kBasicTestCases[] = {
55 {"", -1}, {"j", -1}, {"jp", 0}, {"jjp", -1}, {"jpp", -1}, 69 {"", -1}, {"j", -1}, {"jp", 0}, {"jjp", -1}, {"jpp", -1},
56 {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4}, 70 {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4},
57 }; 71 };
58 72
73 // Helper function for EnumerateDafsaLanaguage.
74 void RecursivelyEnumerateDafsaLanguage(
75 const FixedSetIncrementalLookup& lookup,
76 std::string* sequence,
77 std::vector<std::pair<std::string, int>>* language) {
78 int result = lookup.GetResultForCurrentSequence();
79 if (result != kDafsaNotFound) {
80 language->emplace_back(std::string(sequence->begin(), sequence->end()),
81 result);
82 }
83 // Try appending each char value.
84 for (char c = std::numeric_limits<char>::min();; ++c) {
85 FixedSetIncrementalLookup continued_lookup = lookup;
86 if (continued_lookup.Advance(c)) {
87 sequence->push_back(c);
88 size_t saved_language_size = language->size();
89 RecursivelyEnumerateDafsaLanguage(continued_lookup, sequence, language);
90 CHECK_LT(saved_language_size, language->size())
91 << "DAFSA includes a branch to nowhere at node: " << *sequence;
92 sequence->pop_back();
93 }
94 if (c == std::numeric_limits<char>::max())
95 break;
96 }
97 }
98
99 // Uses FixedSetIncrementalLookup to build a vector of every string in the
100 // language of the DAFSA.
101 template <typename Graph>
102 std::vector<std::pair<std::string, int>> EnumerateDafsaLanguage(
103 const Graph& graph) {
104 FixedSetIncrementalLookup query(graph, sizeof(Graph));
105 std::string sequence;
106 std::vector<std::pair<std::string, int>> language;
107 RecursivelyEnumerateDafsaLanguage(query, &sequence, &language);
108 return language;
109 }
110
59 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, 111 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest,
60 Dafsa1Test, 112 Dafsa1Test,
61 ::testing::ValuesIn(kBasicTestCases)); 113 ::testing::ValuesIn(kBasicTestCases));
62 114
63 class Dafsa3Test : public LookupStringInFixedSetTest {}; 115 class Dafsa3Test : public LookupStringInFixedSetTest {};
64 116
65 // This DAFSA is constructed so that labels begin and end with unique 117 // This DAFSA is constructed so that labels begin and end with unique
66 // characters, which makes it impossible to merge labels. Each inner node 118 // characters, which makes it impossible to merge labels. Each inner node
67 // is about 100 bytes and a one byte offset can at most add 64 bytes to 119 // is about 100 bytes and a one byte offset can at most add 64 bytes to
68 // previous offset. Thus the paths must go over two byte offsets. 120 // previous offset. Thus the paths must go over two byte offsets.
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
151 203
152 const Expectation kJoinedSuffixesTestCases[] = { 204 const Expectation kJoinedSuffixesTestCases[] = {
153 {"ia", 0}, {"jb", 4}, {"kaa", 0}, {"lbb", 4}, 205 {"ia", 0}, {"jb", 4}, {"kaa", 0}, {"lbb", 4},
154 {"aaa", -1}, {"bbb", -1}, {"maaaa", 0}, {"nbbbb", 0}, 206 {"aaa", -1}, {"bbb", -1}, {"maaaa", 0}, {"nbbbb", 0},
155 }; 207 };
156 208
157 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, 209 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest,
158 Dafsa6Test, 210 Dafsa6Test,
159 ::testing::ValuesIn(kJoinedSuffixesTestCases)); 211 ::testing::ValuesIn(kJoinedSuffixesTestCases));
160 212
213 // Validates that the generated DAFSA contains exactly the same information as
214 // effective_tld_names.gperf.
Ryan Sleevi 2017/01/25 19:11:04 So in general, I try to push back on these "Test a
ncarter (slow) 2017/01/26 23:29:11 Regarding coupling: these unittests are already wr
Ryan Sleevi 2017/01/27 00:08:24 Yeah, this used to be an implementation detail of
ncarter (slow) 2017/02/15 23:42:11 I've redone these unittests so that they just run
215 TEST(LookupStringInFixedSetTest, TestDafsaLanguageMatchesGperfFile) {
216 // This test runs on the real effective TLD names file.
217 auto language = EnumerateDafsaLanguage(effective_tld_names::kDafsa);
218
219 // Generate a sorted list of every character sequence and result code that the
220 // DAFSA recognizes, mimicing the gperf format.
221 std::vector<std::string> regenerated_gperf;
222 for (const auto& rule : language) {
223 // Mimic the gperf format: "<input-string>, <result-code-int>\n"
224 regenerated_gperf.emplace_back(
225 base::StringPrintf("%s, %d\n", rule.first.c_str(), rule.second));
226 }
227 // Our .gperf files happen to be sorted, so mimic that.
228 std::sort(regenerated_gperf.begin(), regenerated_gperf.end());
229
230 std::string regenerated_gperf_text =
231 base::JoinString(regenerated_gperf, base::StringPiece());
232
233 // Second, read the source .gperf file into memory (this is the file from
234 // which effective_tld_names::kDafsa was generated).
235 base::FilePath gperf_file_path;
236 PathService::Get(base::DIR_SOURCE_ROOT, &gperf_file_path);
237 gperf_file_path = gperf_file_path.AppendASCII("net")
238 .AppendASCII("base")
239 .AppendASCII("registry_controlled_domains")
240 .AppendASCII("effective_tld_names.gperf");
241 std::string actual_gperf_text;
242 EXPECT_TRUE(base::ReadFileToString(gperf_file_path, &actual_gperf_text));
243 std::string key = "%%\n";
244 actual_gperf_text.erase(0, actual_gperf_text.find(key) + key.length());
245 actual_gperf_text.erase(actual_gperf_text.rfind(key));
246
247 // |regenerated_gperf_text| should match the body of the .gperf file exactly.
248 EXPECT_EQ(actual_gperf_text, regenerated_gperf_text);
249
250 // Sanity check to prevent trivial success.
251 EXPECT_GT(actual_gperf_text.length(), 30000U);
252 }
253
161 } // namespace 254 } // namespace
162 } // namespace net 255 } // namespace net
OLDNEW
« net/base/lookup_string_in_fixed_set.cc ('K') | « net/base/lookup_string_in_fixed_set.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698