Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(680)

Unified Diff: net/tools/tld_cleanup/tld_cleanup_util.cc

Issue 992733002: Remove //net (except for Android test stuff) and sdch (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « net/tools/tld_cleanup/tld_cleanup_util.h ('k') | net/tools/tld_cleanup/tld_cleanup_util_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: net/tools/tld_cleanup/tld_cleanup_util.cc
diff --git a/net/tools/tld_cleanup/tld_cleanup_util.cc b/net/tools/tld_cleanup/tld_cleanup_util.cc
deleted file mode 100644
index 8cf2323fdd4e100e9aa1f9298d44801a02591bde..0000000000000000000000000000000000000000
--- a/net/tools/tld_cleanup/tld_cleanup_util.cc
+++ /dev/null
@@ -1,255 +0,0 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "net/tools/tld_cleanup/tld_cleanup_util.h"
-
-#include "base/files/file_util.h"
-#include "base/logging.h"
-#include "base/strings/string_number_conversions.h"
-#include "base/strings/string_util.h"
-#include "url/gurl.h"
-#include "url/url_parse.h"
-
-namespace {
-
-const char kBeginPrivateDomainsComment[] = "// ===BEGIN PRIVATE DOMAINS===";
-const char kEndPrivateDomainsComment[] = "// ===END PRIVATE DOMAINS===";
-
-const int kExceptionRule = 1;
-const int kWildcardRule = 2;
-const int kPrivateRule = 4;
-}
-
-namespace net {
-namespace tld_cleanup {
-
-// Writes the list of domain rules contained in the 'rules' set to the
-// 'outfile', with each rule terminated by a LF. The file must already have
-// been created with write access.
-bool WriteRules(const RuleMap& rules, const base::FilePath& outfile) {
- std::string data;
- data.append("%{\n"
- "// Copyright 2012 The Chromium Authors. All rights reserved.\n"
- "// Use of this source code is governed by a BSD-style license "
- "that can be\n"
- "// found in the LICENSE file.\n\n"
- "// This file is generated by net/tools/tld_cleanup/.\n"
- "// DO NOT MANUALLY EDIT!\n"
- "%}\n"
- "struct DomainRule {\n"
- " int name_offset;\n"
- " int type; // flags: 1: exception, 2: wildcard, 4: private\n"
- "};\n"
- "%%\n");
-
- for (RuleMap::const_iterator i = rules.begin(); i != rules.end(); ++i) {
- data.append(i->first);
- data.append(", ");
- int type = 0;
- if (i->second.exception) {
- type = kExceptionRule;
- } else if (i->second.wildcard) {
- type = kWildcardRule;
- }
- if (i->second.is_private) {
- type += kPrivateRule;
- }
- data.append(base::IntToString(type));
- data.append("\n");
- }
-
- data.append("%%\n");
-
- int written = base::WriteFile(outfile,
- data.data(),
- static_cast<int>(data.size()));
-
- return written == static_cast<int>(data.size());
-}
-
-// Adjusts the rule to a standard form: removes single extraneous dots and
-// canonicalizes it using GURL. Returns kSuccess if the rule is interpreted as
-// valid; logs a warning and returns kWarning if it is probably invalid; and
-// logs an error and returns kError if the rule is (almost) certainly invalid.
-NormalizeResult NormalizeRule(std::string* domain, Rule* rule) {
- NormalizeResult result = kSuccess;
-
- // Strip single leading and trailing dots.
- if (domain->at(0) == '.')
- domain->erase(0, 1);
- if (domain->empty()) {
- LOG(WARNING) << "Ignoring empty rule";
- return kWarning;
- }
- if (domain->at(domain->size() - 1) == '.')
- domain->erase(domain->size() - 1, 1);
- if (domain->empty()) {
- LOG(WARNING) << "Ignoring empty rule";
- return kWarning;
- }
-
- // Allow single leading '*.' or '!', saved here so it's not canonicalized.
- size_t start_offset = 0;
- if (domain->at(0) == '!') {
- domain->erase(0, 1);
- rule->exception = true;
- } else if (domain->find("*.") == 0) {
- domain->erase(0, 2);
- rule->wildcard = true;
- }
- if (domain->empty()) {
- LOG(WARNING) << "Ignoring empty rule";
- return kWarning;
- }
-
- // Warn about additional '*.' or '!'.
- if (domain->find("*.", start_offset) != std::string::npos ||
- domain->find('!', start_offset) != std::string::npos) {
- LOG(WARNING) << "Keeping probably invalid rule: " << *domain;
- result = kWarning;
- }
-
- // Make a GURL and normalize it, then get the host back out.
- std::string url = "http://";
- url.append(*domain);
- GURL gurl(url);
- const std::string& spec = gurl.possibly_invalid_spec();
- url::Component host = gurl.parsed_for_possibly_invalid_spec().host;
- if (host.len < 0) {
- LOG(ERROR) << "Ignoring rule that couldn't be normalized: " << *domain;
- return kError;
- }
- if (!gurl.is_valid()) {
- LOG(WARNING) << "Keeping rule that GURL says is invalid: " << *domain;
- result = kWarning;
- }
- domain->assign(spec.substr(host.begin, host.len));
-
- return result;
-}
-
-NormalizeResult NormalizeDataToRuleMap(const std::string data,
- RuleMap* rules) {
- CHECK(rules);
- // We do a lot of string assignment during parsing, but simplicity is more
- // important than performance here.
- std::string domain;
- NormalizeResult result = kSuccess;
- size_t line_start = 0;
- size_t line_end = 0;
- bool is_private = false;
- RuleMap extra_rules;
- int begin_private_length = arraysize(kBeginPrivateDomainsComment) - 1;
- int end_private_length = arraysize(kEndPrivateDomainsComment) - 1;
- while (line_start < data.size()) {
- if (line_start + begin_private_length < data.size() &&
- !data.compare(line_start, begin_private_length,
- kBeginPrivateDomainsComment)) {
- is_private = true;
- line_end = line_start + begin_private_length;
- } else if (line_start + end_private_length < data.size() &&
- !data.compare(line_start, end_private_length,
- kEndPrivateDomainsComment)) {
- is_private = false;
- line_end = line_start + end_private_length;
- } else if (line_start + 1 < data.size() &&
- data[line_start] == '/' &&
- data[line_start + 1] == '/') {
- // Skip comments.
- line_end = data.find_first_of("\r\n", line_start);
- if (line_end == std::string::npos)
- line_end = data.size();
- } else {
- // Truncate at first whitespace.
- line_end = data.find_first_of("\r\n \t", line_start);
- if (line_end == std::string::npos)
- line_end = data.size();
- domain.assign(data.data(), line_start, line_end - line_start);
-
- Rule rule;
- rule.wildcard = false;
- rule.exception = false;
- rule.is_private = is_private;
- NormalizeResult new_result = NormalizeRule(&domain, &rule);
- if (new_result != kError) {
- // Check the existing rules to make sure we don't have an exception and
- // wildcard for the same rule, or that the same domain is listed as both
- // private and not private. If we did, we'd have to update our
- // parsing code to handle this case.
- CHECK(rules->find(domain) == rules->end())
- << "Duplicate rule found for " << domain;
-
- (*rules)[domain] = rule;
- // Add true TLD for multi-level rules. We don't add them right now, in
- // case there's an exception or wild card that either exists or might be
- // added in a later iteration. In those cases, there's no need to add
- // it and it would just slow down parsing the data.
- size_t tld_start = domain.find_last_of('.');
- if (tld_start != std::string::npos && tld_start + 1 < domain.size()) {
- std::string extra_rule_domain = domain.substr(tld_start + 1);
- RuleMap::const_iterator iter = extra_rules.find(extra_rule_domain);
- Rule extra_rule;
- extra_rule.exception = false;
- extra_rule.wildcard = false;
- if (iter == extra_rules.end()) {
- extra_rule.is_private = is_private;
- } else {
- // A rule already exists, so we ensure that if any of the entries is
- // not private the result should be that the entry is not private.
- // An example is .au which is not listed as a real TLD, but only
- // lists second-level domains such as com.au. Subdomains of .au
- // (eg. blogspot.com.au) are also listed in the private section,
- // which is processed later, so this ensures that the real TLD
- // (eg. .au) is listed as public.
- extra_rule.is_private = is_private && iter->second.is_private;
- }
- extra_rules[extra_rule_domain] = extra_rule;
- }
- }
- result = std::max(result, new_result);
- }
-
- // Find beginning of next non-empty line.
- line_start = data.find_first_of("\r\n", line_end);
- if (line_start == std::string::npos)
- line_start = data.size();
- line_start = data.find_first_not_of("\r\n", line_start);
- if (line_start == std::string::npos)
- line_start = data.size();
- }
-
- for (RuleMap::const_iterator iter = extra_rules.begin();
- iter != extra_rules.end();
- ++iter) {
- if (rules->find(iter->first) == rules->end()) {
- (*rules)[iter->first] = iter->second;
- }
- }
-
- return result;
-}
-
-NormalizeResult NormalizeFile(const base::FilePath& in_filename,
- const base::FilePath& out_filename) {
- RuleMap rules;
- std::string data;
- if (!base::ReadFileToString(in_filename, &data)) {
- LOG(ERROR) << "Unable to read file";
- // We return success since we've already reported the error.
- return kSuccess;
- }
-
- NormalizeResult result = NormalizeDataToRuleMap(data, &rules);
-
- if (!WriteRules(rules, out_filename)) {
- LOG(ERROR) << "Error(s) writing output file";
- result = kError;
- }
-
- return result;
-}
-
-
-} // namespace tld_cleanup
-} // namespace net
« no previous file with comments | « net/tools/tld_cleanup/tld_cleanup_util.h ('k') | net/tools/tld_cleanup/tld_cleanup_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698