Chromium Code Reviews| Index: net/base/lookup_string_in_fixed_set_unittest.cc |
| diff --git a/net/base/lookup_string_in_fixed_set_unittest.cc b/net/base/lookup_string_in_fixed_set_unittest.cc |
| index 82e6c22d84be20d215c7667ea39ee9127d8f37ff..a97b031141d929fb5148ef7ca580ff119c14b3c3 100644 |
| --- a/net/base/lookup_string_in_fixed_set_unittest.cc |
| +++ b/net/base/lookup_string_in_fixed_set_unittest.cc |
| @@ -5,12 +5,26 @@ |
| #include "net/base/lookup_string_in_fixed_set.h" |
| #include <string.h> |
| -#include <ostream> |
| +#include <algorithm> |
| +#include <limits> |
| +#include <ostream> |
| +#include <utility> |
| +#include <vector> |
| + |
| +#include "base/base_paths.h" |
| +#include "base/files/file_path.h" |
| +#include "base/files/file_util.h" |
| +#include "base/path_service.h" |
| +#include "base/strings/string_util.h" |
| +#include "base/strings/stringprintf.h" |
| #include "testing/gtest/include/gtest/gtest.h" |
| namespace net { |
| namespace { |
| +namespace effective_tld_names { |
| +#include "net/base/registry_controlled_domains/effective_tld_names-inc.cc" |
| +} |
| namespace test1 { |
| #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc.cc" |
| } |
| @@ -56,6 +70,44 @@ const Expectation kBasicTestCases[] = { |
| {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4}, |
| }; |
| +// Helper function for EnumerateDafsaLanaguage. |
| +void RecursivelyEnumerateDafsaLanguage( |
| + const FixedSetIncrementalLookup& lookup, |
| + std::string* sequence, |
| + std::vector<std::pair<std::string, int>>* language) { |
| + int result = lookup.GetResultForCurrentSequence(); |
| + if (result != kDafsaNotFound) { |
| + language->emplace_back(std::string(sequence->begin(), sequence->end()), |
| + result); |
| + } |
| + // Try appending each char value. |
| + for (char c = std::numeric_limits<char>::min();; ++c) { |
| + FixedSetIncrementalLookup continued_lookup = lookup; |
| + if (continued_lookup.Advance(c)) { |
| + sequence->push_back(c); |
| + size_t saved_language_size = language->size(); |
| + RecursivelyEnumerateDafsaLanguage(continued_lookup, sequence, language); |
| + CHECK_LT(saved_language_size, language->size()) |
| + << "DAFSA includes a branch to nowhere at node: " << *sequence; |
| + sequence->pop_back(); |
| + } |
| + if (c == std::numeric_limits<char>::max()) |
| + break; |
| + } |
| +} |
| + |
| +// Uses FixedSetIncrementalLookup to build a vector of every string in the |
| +// language of the DAFSA. |
| +template <typename Graph> |
| +std::vector<std::pair<std::string, int>> EnumerateDafsaLanguage( |
| + const Graph& graph) { |
| + FixedSetIncrementalLookup query(graph, sizeof(Graph)); |
| + std::string sequence; |
| + std::vector<std::pair<std::string, int>> language; |
| + RecursivelyEnumerateDafsaLanguage(query, &sequence, &language); |
| + return language; |
| +} |
| + |
| INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, |
| Dafsa1Test, |
| ::testing::ValuesIn(kBasicTestCases)); |
| @@ -158,5 +210,46 @@ INSTANTIATE_TEST_CASE_P(LookupStringInFixedSetTest, |
| Dafsa6Test, |
| ::testing::ValuesIn(kJoinedSuffixesTestCases)); |
| +// Validates that the generated DAFSA contains exactly the same information as |
| +// effective_tld_names.gperf. |
|
Ryan Sleevi
2017/01/25 19:11:04
So in general, I try to push back on these "Test a
ncarter (slow)
2017/01/26 23:29:11
Regarding coupling: these unittests are already wr
Ryan Sleevi
2017/01/27 00:08:24
Yeah, this used to be an implementation detail of
ncarter (slow)
2017/02/15 23:42:11
I've redone these unittests so that they just run
|
| +TEST(LookupStringInFixedSetTest, TestDafsaLanguageMatchesGperfFile) { |
| + // This test runs on the real effective TLD names file. |
| + auto language = EnumerateDafsaLanguage(effective_tld_names::kDafsa); |
| + |
| + // Generate a sorted list of every character sequence and result code that the |
| + // DAFSA recognizes, mimicing the gperf format. |
| + std::vector<std::string> regenerated_gperf; |
| + for (const auto& rule : language) { |
| + // Mimic the gperf format: "<input-string>, <result-code-int>\n" |
| + regenerated_gperf.emplace_back( |
| + base::StringPrintf("%s, %d\n", rule.first.c_str(), rule.second)); |
| + } |
| + // Our .gperf files happen to be sorted, so mimic that. |
| + std::sort(regenerated_gperf.begin(), regenerated_gperf.end()); |
| + |
| + std::string regenerated_gperf_text = |
| + base::JoinString(regenerated_gperf, base::StringPiece()); |
| + |
| + // Second, read the source .gperf file into memory (this is the file from |
| + // which effective_tld_names::kDafsa was generated). |
| + base::FilePath gperf_file_path; |
| + PathService::Get(base::DIR_SOURCE_ROOT, &gperf_file_path); |
| + gperf_file_path = gperf_file_path.AppendASCII("net") |
| + .AppendASCII("base") |
| + .AppendASCII("registry_controlled_domains") |
| + .AppendASCII("effective_tld_names.gperf"); |
| + std::string actual_gperf_text; |
| + EXPECT_TRUE(base::ReadFileToString(gperf_file_path, &actual_gperf_text)); |
| + std::string key = "%%\n"; |
| + actual_gperf_text.erase(0, actual_gperf_text.find(key) + key.length()); |
| + actual_gperf_text.erase(actual_gperf_text.rfind(key)); |
| + |
| + // |regenerated_gperf_text| should match the body of the .gperf file exactly. |
| + EXPECT_EQ(actual_gperf_text, regenerated_gperf_text); |
| + |
| + // Sanity check to prevent trivial success. |
| + EXPECT_GT(actual_gperf_text.length(), 30000U); |
| +} |
| + |
| } // namespace |
| } // namespace net |