| Index: net/base/lookup_string_in_fixed_set_unittest.cc
|
| diff --git a/net/base/lookup_string_in_fixed_set_unittest.cc b/net/base/lookup_string_in_fixed_set_unittest.cc
|
| index 82e6c22d84be20d215c7667ea39ee9127d8f37ff..7a6f76b860fdf403c5ac74a3e3b85f6f2eb5afdb 100644
|
| --- a/net/base/lookup_string_in_fixed_set_unittest.cc
|
| +++ b/net/base/lookup_string_in_fixed_set_unittest.cc
|
| @@ -5,12 +5,26 @@
|
| #include "net/base/lookup_string_in_fixed_set.h"
|
|
|
| #include <string.h>
|
| +
|
| +#include <algorithm>
|
| +#include <limits>
|
| #include <ostream>
|
| +#include <utility>
|
| +#include <vector>
|
|
|
| +#include "base/base_paths.h"
|
| +#include "base/files/file_path.h"
|
| +#include "base/files/file_util.h"
|
| +#include "base/path_service.h"
|
| +#include "base/strings/string_util.h"
|
| +#include "base/strings/stringprintf.h"
|
| #include "testing/gtest/include/gtest/gtest.h"
|
|
|
| namespace net {
|
| namespace {
|
| +namespace effective_tld_names {
|
| +#include "net/base/registry_controlled_domains/effective_tld_names-inc.cc"
|
| +}
|
| namespace test1 {
|
| #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc.cc"
|
| }
|
| @@ -56,6 +70,45 @@ const Expectation kBasicTestCases[] = {
|
| {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4},
|
| };
|
|
|
| +// Helper function for EnumerateDafsaLanaguage.
|
| +void RecursivelyEnumerateDafsaLanguage(
|
| + const FixedSetIncrementalLookup& lookup,
|
| + std::vector<char>* sequence,
|
| + std::vector<std::pair<std::string, int>>* language) {
|
| + int result = lookup.GetResultForCurrentSequence();
|
| + if (result != kDafsaNotFound) {
|
| + language->emplace_back(std::string(sequence->begin(), sequence->end()),
|
| + result);
|
| + }
|
| + // Try appending each char value.
|
| + for (char c = std::numeric_limits<char>::min();; ++c) {
|
| + FixedSetIncrementalLookup continued_lookup = lookup;
|
| + if (continued_lookup.Advance(c)) {
|
| + sequence->push_back(c);
|
| + size_t saved_language_size = language->size();
|
| + RecursivelyEnumerateDafsaLanguage(continued_lookup, sequence, language);
|
| + CHECK_LT(saved_language_size, language->size())
|
| + << "DAFSA includes a branch to nowhere at node: "
|
| + << std::string(sequence->begin(), sequence->end());
|
| + sequence->pop_back();
|
| + }
|
| + if (c == std::numeric_limits<char>::max())
|
| + break;
|
| + }
|
| +}
|
| +
|
| +// Uses FixedSetIncrementalLookup to build a vector of every string in the
|
| +// language of the DAFSA.
|
| +template <typename Graph>
|
| +std::vector<std::pair<std::string, int>> EnumerateDafsaLanguage(
|
| + const Graph& graph) {
|
| + FixedSetIncrementalLookup query(graph, sizeof(Graph));
|
| + std::vector<char> sequence;
|
| + std::vector<std::pair<std::string, int>> language;
|
| + RecursivelyEnumerateDafsaLanguage(query, &sequence, &language);
|
| + return language;
|
| +}
|
| +
|
| INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest,
|
| Dafsa1Test,
|
| ::testing::ValuesIn(kBasicTestCases));
|
| @@ -158,5 +211,46 @@ INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest,
|
| Dafsa6Test,
|
| ::testing::ValuesIn(kJoinedSuffixesTestCases));
|
|
|
| +// Validates that the generated DAFSA contains exactly the same information as
|
| +// effective_tld_names.gperf.
|
| +TEST(LookupStringInFixedSetTest, TestDafsaLanguageMatchesGperfFile) {
|
| + // This test runs on the real effective TLD names file.
|
| + auto language = EnumerateDafsaLanguage(effective_tld_names::kDafsa);
|
| +
|
| + // Generate a sorted list of every character sequence and result code that the
|
| + // DAFSA recognizes, mimicing the gperf format.
|
| + std::vector<std::string> regenerated_gperf;
|
| + for (const auto& rule : language) {
|
| + // Mimic the gperf format: "<input-string>, <result-code-int>\n"
|
| + regenerated_gperf.emplace_back(
|
| + base::StringPrintf("%s, %d\n", rule.first.c_str(), rule.second));
|
| + }
|
| + // Our .gperf files happen to be sorted, so mimic that.
|
| + std::sort(regenerated_gperf.begin(), regenerated_gperf.end());
|
| +
|
| + std::string regenerated_gperf_text =
|
| + base::JoinString(regenerated_gperf, base::StringPiece());
|
| +
|
| + // Second, read the source .gperf file into memory (this is the file from
|
| + // which effective_tld_names::kDafsa was generated).
|
| + base::FilePath gperf_file_path;
|
| + PathService::Get(base::DIR_SOURCE_ROOT, &gperf_file_path);
|
| + gperf_file_path = gperf_file_path.AppendASCII("net")
|
| + .AppendASCII("base")
|
| + .AppendASCII("registry_controlled_domains")
|
| + .AppendASCII("effective_tld_names.gperf");
|
| + std::string actual_gperf_text;
|
| + EXPECT_TRUE(base::ReadFileToString(gperf_file_path, &actual_gperf_text));
|
| + std::string key = "%%\n";
|
| + actual_gperf_text.erase(0, actual_gperf_text.find(key) + key.length());
|
| + actual_gperf_text.erase(actual_gperf_text.rfind(key));
|
| +
|
| + // |regenerated_gperf_text| should match the body of the .gperf file exactly.
|
| + EXPECT_EQ(actual_gperf_text, regenerated_gperf_text);
|
| +
|
| + // Sanity check to prevent trivial success.
|
| + EXPECT_GT(actual_gperf_text.length(), 30000U);
|
| +}
|
| +
|
| } // namespace
|
| } // namespace net
|
|
|