Index: net/base/lookup_string_in_fixed_set_unittest.cc |
diff --git a/net/base/lookup_string_in_fixed_set_unittest.cc b/net/base/lookup_string_in_fixed_set_unittest.cc |
index 82e6c22d84be20d215c7667ea39ee9127d8f37ff..7a6f76b860fdf403c5ac74a3e3b85f6f2eb5afdb 100644 |
--- a/net/base/lookup_string_in_fixed_set_unittest.cc |
+++ b/net/base/lookup_string_in_fixed_set_unittest.cc |
@@ -5,12 +5,26 @@ |
#include "net/base/lookup_string_in_fixed_set.h" |
#include <string.h> |
+ |
+#include <algorithm> |
+#include <limits> |
#include <ostream> |
+#include <utility> |
+#include <vector> |
+#include "base/base_paths.h" |
+#include "base/files/file_path.h" |
+#include "base/files/file_util.h" |
+#include "base/path_service.h" |
+#include "base/strings/string_util.h" |
+#include "base/strings/stringprintf.h" |
#include "testing/gtest/include/gtest/gtest.h" |
namespace net { |
namespace { |
+namespace effective_tld_names { |
+#include "net/base/registry_controlled_domains/effective_tld_names-inc.cc" |
+} |
namespace test1 { |
#include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc.cc" |
} |
@@ -56,6 +70,45 @@ const Expectation kBasicTestCases[] = { |
{"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4}, |
}; |
+// Helper function for EnumerateDafsaLanaguage. |
+void RecursivelyEnumerateDafsaLanguage( |
+ const FixedSetIncrementalLookup& lookup, |
+ std::vector<char>* sequence, |
+ std::vector<std::pair<std::string, int>>* language) { |
+ int result = lookup.GetResultForCurrentSequence(); |
+ if (result != kDafsaNotFound) { |
+ language->emplace_back(std::string(sequence->begin(), sequence->end()), |
+ result); |
+ } |
+ // Try appending each char value. |
+ for (char c = std::numeric_limits<char>::min();; ++c) { |
+ FixedSetIncrementalLookup continued_lookup = lookup; |
+ if (continued_lookup.Advance(c)) { |
+ sequence->push_back(c); |
+ size_t saved_language_size = language->size(); |
+ RecursivelyEnumerateDafsaLanguage(continued_lookup, sequence, language); |
+ CHECK_LT(saved_language_size, language->size()) |
+ << "DAFSA includes a branch to nowhere at node: " |
+ << std::string(sequence->begin(), sequence->end()); |
+ sequence->pop_back(); |
+ } |
+ if (c == std::numeric_limits<char>::max()) |
+ break; |
+ } |
+} |
+ |
+// Uses FixedSetIncrementalLookup to build a vector of every string in the |
+// language of the DAFSA. |
+template <typename Graph> |
+std::vector<std::pair<std::string, int>> EnumerateDafsaLanguage( |
+ const Graph& graph) { |
+ FixedSetIncrementalLookup query(graph, sizeof(Graph)); |
+ std::vector<char> sequence; |
+ std::vector<std::pair<std::string, int>> language; |
+ RecursivelyEnumerateDafsaLanguage(query, &sequence, &language); |
+ return language; |
+} |
+ |
INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, |
Dafsa1Test, |
::testing::ValuesIn(kBasicTestCases)); |
@@ -158,5 +211,46 @@ INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, |
Dafsa6Test, |
::testing::ValuesIn(kJoinedSuffixesTestCases)); |
+// Validates that the generated DAFSA contains exactly the same information as |
+// effective_tld_names.gperf. |
+TEST(LookupStringInFixedSetTest, TestDafsaLanguageMatchesGperfFile) { |
+ // This test runs on the real effective TLD names file. |
+ auto language = EnumerateDafsaLanguage(effective_tld_names::kDafsa); |
+ |
+ // Generate a sorted list of every character sequence and result code that the |
+ // DAFSA recognizes, mimicing the gperf format. |
+ std::vector<std::string> regenerated_gperf; |
+ for (const auto& rule : language) { |
+ // Mimic the gperf format: "<input-string>, <result-code-int>\n" |
+ regenerated_gperf.emplace_back( |
+ base::StringPrintf("%s, %d\n", rule.first.c_str(), rule.second)); |
+ } |
+ // Our .gperf files happen to be sorted, so mimic that. |
+ std::sort(regenerated_gperf.begin(), regenerated_gperf.end()); |
+ |
+ std::string regenerated_gperf_text = |
+ base::JoinString(regenerated_gperf, base::StringPiece()); |
+ |
+ // Second, read the source .gperf file into memory (this is the file from |
+ // which effective_tld_names::kDafsa was generated). |
+ base::FilePath gperf_file_path; |
+ PathService::Get(base::DIR_SOURCE_ROOT, &gperf_file_path); |
+ gperf_file_path = gperf_file_path.AppendASCII("net") |
+ .AppendASCII("base") |
+ .AppendASCII("registry_controlled_domains") |
+ .AppendASCII("effective_tld_names.gperf"); |
+ std::string actual_gperf_text; |
+ EXPECT_TRUE(base::ReadFileToString(gperf_file_path, &actual_gperf_text)); |
+ std::string key = "%%\n"; |
+ actual_gperf_text.erase(0, actual_gperf_text.find(key) + key.length()); |
+ actual_gperf_text.erase(actual_gperf_text.rfind(key)); |
+ |
+ // |regenerated_gperf_text| should match the body of the .gperf file exactly. |
+ EXPECT_EQ(actual_gperf_text, regenerated_gperf_text); |
+ |
+ // Sanity check to prevent trivial success. |
+ EXPECT_GT(actual_gperf_text.length(), 30000U); |
+} |
+ |
} // namespace |
} // namespace net |