Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Side by Side Diff: net/base/lookup_string_in_fixed_set_unittest.cc

Issue 2641953009: [1 of 4] Support prefix queries against the effective_tld_names DAFSA (Closed)
Patch Set: Fixes. Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/lookup_string_in_fixed_set.h" 5 #include "net/base/lookup_string_in_fixed_set.h"
6 6
7 #include <string.h> 7 #include <string.h>
8
9 #include <algorithm>
10 #include <limits>
8 #include <ostream> 11 #include <ostream>
12 #include <utility>
13 #include <vector>
9 14
15 #include "base/base_paths.h"
16 #include "base/files/file_path.h"
17 #include "base/files/file_util.h"
18 #include "base/path_service.h"
19 #include "base/strings/string_util.h"
20 #include "base/strings/stringprintf.h"
10 #include "testing/gtest/include/gtest/gtest.h" 21 #include "testing/gtest/include/gtest/gtest.h"
11 22
12 namespace net { 23 namespace net {
13 namespace { 24 namespace {
25 namespace effective_tld_names {
26 #include "net/base/registry_controlled_domains/effective_tld_names-inc.cc"
27 }
14 namespace test1 { 28 namespace test1 {
15 #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc .cc" 29 #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc .cc"
16 } 30 }
17 namespace test3 { 31 namespace test3 {
18 #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc .cc" 32 #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc .cc"
19 } 33 }
20 namespace test4 { 34 namespace test4 {
21 #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc .cc" 35 #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc .cc"
22 } 36 }
23 namespace test5 { 37 namespace test5 {
(...skipping 25 matching lines...) Expand all
49 TEST_P(Dafsa1Test, BasicTest) { 63 TEST_P(Dafsa1Test, BasicTest) {
50 const Expectation& param = GetParam(); 64 const Expectation& param = GetParam();
51 EXPECT_EQ(param.value, LookupInGraph(test1::kDafsa, param.key)); 65 EXPECT_EQ(param.value, LookupInGraph(test1::kDafsa, param.key));
52 } 66 }
53 67
54 const Expectation kBasicTestCases[] = { 68 const Expectation kBasicTestCases[] = {
55 {"", -1}, {"j", -1}, {"jp", 0}, {"jjp", -1}, {"jpp", -1}, 69 {"", -1}, {"j", -1}, {"jp", 0}, {"jjp", -1}, {"jpp", -1},
56 {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4}, 70 {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4},
57 }; 71 };
58 72
73 // Helper function for EnumerateDafsaLanaguage.
74 void RecursivelyEnumerateDafsaLanguage(
75 const FixedSetIncrementalLookup& lookup,
76 std::vector<char>* sequence,
77 std::vector<std::pair<std::string, int>>* language) {
78 int result = lookup.GetResultForCurrentSequence();
79 if (result != kDafsaNotFound) {
80 language->emplace_back(std::string(sequence->begin(), sequence->end()),
81 result);
82 }
83 // Try appending each char value.
84 for (char c = std::numeric_limits<char>::min();; ++c) {
85 FixedSetIncrementalLookup continued_lookup = lookup;
86 if (continued_lookup.Advance(c)) {
87 sequence->push_back(c);
88 size_t saved_language_size = language->size();
89 RecursivelyEnumerateDafsaLanguage(continued_lookup, sequence, language);
90 CHECK_LT(saved_language_size, language->size())
91 << "DAFSA includes a branch to nowhere at node: "
92 << std::string(sequence->begin(), sequence->end());
93 sequence->pop_back();
94 }
95 if (c == std::numeric_limits<char>::max())
96 break;
97 }
98 }
99
100 // Uses FixedSetIncrementalLookup to build a vector of every string in the
101 // language of the DAFSA.
102 template <typename Graph>
103 std::vector<std::pair<std::string, int>> EnumerateDafsaLanguage(
104 const Graph& graph) {
105 FixedSetIncrementalLookup query(graph, sizeof(Graph));
106 std::vector<char> sequence;
107 std::vector<std::pair<std::string, int>> language;
108 RecursivelyEnumerateDafsaLanguage(query, &sequence, &language);
109 return language;
110 }
111
59 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, 112 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest,
60 Dafsa1Test, 113 Dafsa1Test,
61 ::testing::ValuesIn(kBasicTestCases)); 114 ::testing::ValuesIn(kBasicTestCases));
62 115
63 class Dafsa3Test : public LookupStringInFixedSetTest {}; 116 class Dafsa3Test : public LookupStringInFixedSetTest {};
64 117
65 // This DAFSA is constructed so that labels begin and end with unique 118 // This DAFSA is constructed so that labels begin and end with unique
66 // characters, which makes it impossible to merge labels. Each inner node 119 // characters, which makes it impossible to merge labels. Each inner node
67 // is about 100 bytes and a one byte offset can at most add 64 bytes to 120 // is about 100 bytes and a one byte offset can at most add 64 bytes to
68 // previous offset. Thus the paths must go over two byte offsets. 121 // previous offset. Thus the paths must go over two byte offsets.
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
151 204
152 const Expectation kJoinedSuffixesTestCases[] = { 205 const Expectation kJoinedSuffixesTestCases[] = {
153 {"ia", 0}, {"jb", 4}, {"kaa", 0}, {"lbb", 4}, 206 {"ia", 0}, {"jb", 4}, {"kaa", 0}, {"lbb", 4},
154 {"aaa", -1}, {"bbb", -1}, {"maaaa", 0}, {"nbbbb", 0}, 207 {"aaa", -1}, {"bbb", -1}, {"maaaa", 0}, {"nbbbb", 0},
155 }; 208 };
156 209
157 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, 210 INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest,
158 Dafsa6Test, 211 Dafsa6Test,
159 ::testing::ValuesIn(kJoinedSuffixesTestCases)); 212 ::testing::ValuesIn(kJoinedSuffixesTestCases));
160 213
214 // Validates that the generated DAFSA contains exactly the same information as
215 // effective_tld_names.gperf.
216 TEST(LookupStringInFixedSetTest, TestDafsaLanguageMatchesGperfFile) {
217 // This test runs on the real effective TLD names file.
218 auto language = EnumerateDafsaLanguage(effective_tld_names::kDafsa);
219
220 // Generate a sorted list of every character sequence and result code that the
221 // DAFSA recognizes, mimicing the gperf format.
222 std::vector<std::string> regenerated_gperf;
223 for (const auto& rule : language) {
224 // Mimic the gperf format: "<input-string>, <result-code-int>\n"
225 regenerated_gperf.emplace_back(
226 base::StringPrintf("%s, %d\n", rule.first.c_str(), rule.second));
227 }
228 // Our .gperf files happen to be sorted, so mimic that.
229 std::sort(regenerated_gperf.begin(), regenerated_gperf.end());
230
231 std::string regenerated_gperf_text =
232 base::JoinString(regenerated_gperf, base::StringPiece());
233
234 // Second, read the source .gperf file into memory (this is the file from
235 // which effective_tld_names::kDafsa was generated).
236 base::FilePath gperf_file_path;
237 PathService::Get(base::DIR_SOURCE_ROOT, &gperf_file_path);
238 gperf_file_path = gperf_file_path.AppendASCII("net")
239 .AppendASCII("base")
240 .AppendASCII("registry_controlled_domains")
241 .AppendASCII("effective_tld_names.gperf");
242 std::string actual_gperf_text;
243 EXPECT_TRUE(base::ReadFileToString(gperf_file_path, &actual_gperf_text));
244 std::string key = "%%\n";
245 actual_gperf_text.erase(0, actual_gperf_text.find(key) + key.length());
246 actual_gperf_text.erase(actual_gperf_text.rfind(key));
247
248 // |regenerated_gperf_text| should match the body of the .gperf file exactly.
249 EXPECT_EQ(actual_gperf_text, regenerated_gperf_text);
250
251 // Sanity check to prevent trivial success.
252 EXPECT_GT(actual_gperf_text.length(), 30000U);
253 }
254
161 } // namespace 255 } // namespace
162 } // namespace net 256 } // namespace net
OLDNEW
« net/base/lookup_string_in_fixed_set.h ('K') | « net/base/lookup_string_in_fixed_set.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698