Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(113)

Unified Diff: net/base/lookup_string_in_fixed_set_unittest.cc

Issue 2641953009: [1 of 4] Support prefix queries against the effective_tld_names DAFSA (Closed)
Patch Set: Rebase Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: net/base/lookup_string_in_fixed_set_unittest.cc
diff --git a/net/base/lookup_string_in_fixed_set_unittest.cc b/net/base/lookup_string_in_fixed_set_unittest.cc
index 82e6c22d84be20d215c7667ea39ee9127d8f37ff..a97b031141d929fb5148ef7ca580ff119c14b3c3 100644
--- a/net/base/lookup_string_in_fixed_set_unittest.cc
+++ b/net/base/lookup_string_in_fixed_set_unittest.cc
@@ -5,12 +5,26 @@
#include "net/base/lookup_string_in_fixed_set.h"
#include <string.h>
-#include <ostream>
+#include <algorithm>
+#include <limits>
+#include <ostream>
+#include <utility>
+#include <vector>
+
+#include "base/base_paths.h"
+#include "base/files/file_path.h"
+#include "base/files/file_util.h"
+#include "base/path_service.h"
+#include "base/strings/string_util.h"
+#include "base/strings/stringprintf.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace net {
namespace {
+namespace effective_tld_names {
+#include "net/base/registry_controlled_domains/effective_tld_names-inc.cc"
+}
namespace test1 {
#include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc.cc"
}
@@ -56,6 +70,44 @@ const Expectation kBasicTestCases[] = {
{"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4},
};
+// Helper function for EnumerateDafsaLanaguage.
+void RecursivelyEnumerateDafsaLanguage(
+ const FixedSetIncrementalLookup& lookup,
+ std::string* sequence,
+ std::vector<std::pair<std::string, int>>* language) {
+ int result = lookup.GetResultForCurrentSequence();
+ if (result != kDafsaNotFound) {
+ language->emplace_back(std::string(sequence->begin(), sequence->end()),
+ result);
+ }
+ // Try appending each char value.
+ for (char c = std::numeric_limits<char>::min();; ++c) {
+ FixedSetIncrementalLookup continued_lookup = lookup;
+ if (continued_lookup.Advance(c)) {
+ sequence->push_back(c);
+ size_t saved_language_size = language->size();
+ RecursivelyEnumerateDafsaLanguage(continued_lookup, sequence, language);
+ CHECK_LT(saved_language_size, language->size())
+ << "DAFSA includes a branch to nowhere at node: " << *sequence;
+ sequence->pop_back();
+ }
+ if (c == std::numeric_limits<char>::max())
+ break;
+ }
+}
+
+// Uses FixedSetIncrementalLookup to build a vector of every string in the
+// language of the DAFSA.
+template <typename Graph>
+std::vector<std::pair<std::string, int>> EnumerateDafsaLanguage(
+ const Graph& graph) {
+ FixedSetIncrementalLookup query(graph, sizeof(Graph));
+ std::string sequence;
+ std::vector<std::pair<std::string, int>> language;
+ RecursivelyEnumerateDafsaLanguage(query, &sequence, &language);
+ return language;
+}
+
INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest,
Dafsa1Test,
::testing::ValuesIn(kBasicTestCases));
@@ -158,5 +210,46 @@ INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest,
Dafsa6Test,
::testing::ValuesIn(kJoinedSuffixesTestCases));
+// Validates that the generated DAFSA contains exactly the same information as
+// effective_tld_names.gperf.
Ryan Sleevi 2017/01/25 19:11:04 So in general, I try to push back on these "Test a
ncarter (slow) 2017/01/26 23:29:11 Regarding coupling: these unittests are already wr
Ryan Sleevi 2017/01/27 00:08:24 Yeah, this used to be an implementation detail of
ncarter (slow) 2017/02/15 23:42:11 I've redone these unittests so that they just run
+TEST(LookupStringInFixedSetTest, TestDafsaLanguageMatchesGperfFile) {
+ // This test runs on the real effective TLD names file.
+ auto language = EnumerateDafsaLanguage(effective_tld_names::kDafsa);
+
+ // Generate a sorted list of every character sequence and result code that the
+ // DAFSA recognizes, mimicing the gperf format.
+ std::vector<std::string> regenerated_gperf;
+ for (const auto& rule : language) {
+ // Mimic the gperf format: "<input-string>, <result-code-int>\n"
+ regenerated_gperf.emplace_back(
+ base::StringPrintf("%s, %d\n", rule.first.c_str(), rule.second));
+ }
+ // Our .gperf files happen to be sorted, so mimic that.
+ std::sort(regenerated_gperf.begin(), regenerated_gperf.end());
+
+ std::string regenerated_gperf_text =
+ base::JoinString(regenerated_gperf, base::StringPiece());
+
+ // Second, read the source .gperf file into memory (this is the file from
+ // which effective_tld_names::kDafsa was generated).
+ base::FilePath gperf_file_path;
+ PathService::Get(base::DIR_SOURCE_ROOT, &gperf_file_path);
+ gperf_file_path = gperf_file_path.AppendASCII("net")
+ .AppendASCII("base")
+ .AppendASCII("registry_controlled_domains")
+ .AppendASCII("effective_tld_names.gperf");
+ std::string actual_gperf_text;
+ EXPECT_TRUE(base::ReadFileToString(gperf_file_path, &actual_gperf_text));
+ std::string key = "%%\n";
+ actual_gperf_text.erase(0, actual_gperf_text.find(key) + key.length());
+ actual_gperf_text.erase(actual_gperf_text.rfind(key));
+
+ // |regenerated_gperf_text| should match the body of the .gperf file exactly.
+ EXPECT_EQ(actual_gperf_text, regenerated_gperf_text);
+
+ // Sanity check to prevent trivial success.
+ EXPECT_GT(actual_gperf_text.length(), 30000U);
+}
+
} // namespace
} // namespace net
« net/base/lookup_string_in_fixed_set.cc ('K') | « net/base/lookup_string_in_fixed_set.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698