Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(683)

Unified Diff: net/base/registry_controlled_domains/registry_controlled_domain.cc

Issue 2649033004: [3 of 4] Speedup GetRegistryLengthImpl() by seeding the DAFSA in reverse
Patch Set: rebase Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « net/base/registry_controlled_domains/BUILD.gn ('k') | net/tools/dafsa/make_dafsa.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: net/base/registry_controlled_domains/registry_controlled_domain.cc
diff --git a/net/base/registry_controlled_domains/registry_controlled_domain.cc b/net/base/registry_controlled_domains/registry_controlled_domain.cc
index 3777582812bb7103d4062d13a22a53ad342aac8e..d23847988624e00fb0fddee616941e2e10a191b5 100644
--- a/net/base/registry_controlled_domains/registry_controlled_domain.cc
+++ b/net/base/registry_controlled_domains/registry_controlled_domain.cc
@@ -97,65 +97,54 @@ size_t GetRegistryLengthImpl(base::StringPiece host,
return 0; // Multiple trailing dots.
}
- // Walk up the domain tree, most specific to least specific,
- // looking for matches at each level.
- size_t prev_start = std::string::npos;
- size_t curr_start = host_check_begin;
- size_t next_dot = host.find('.', curr_start);
- if (next_dot >= host_check_len) // Catches std::string::npos as well.
- return 0; // This can't have a registry + domain.
- while (1) {
- const char* domain_str = host.data() + curr_start;
- size_t domain_length = host_check_len - curr_start;
- int type = LookupStringInFixedSet(g_graph, g_graph_length, domain_str,
- domain_length);
- bool do_check = type != kDafsaNotFound &&
- (!(type & kDafsaPrivateRule) ||
- private_filter == INCLUDE_PRIVATE_REGISTRIES);
-
- // If the apparent match is a private registry and we're not including
- // those, it can't be an actual match.
- if (do_check) {
- // Exception rules override wildcard rules when the domain is an exact
- // match, but wildcards take precedence when there's a subdomain.
- if (type & kDafsaWildcardRule && (prev_start != std::string::npos)) {
- // If prev_start == host_check_begin, then the host is the registry
- // itself, so return 0.
- return (prev_start == host_check_begin) ? 0
- : (host.length() - prev_start);
- }
-
- if (type & kDafsaExceptionRule) {
- if (next_dot == std::string::npos) {
- // If we get here, we had an exception rule with no dots (e.g.
- // "!foo"). This would only be valid if we had a corresponding
- // wildcard rule, which would have to be "*". But we explicitly
- // disallow that case, so this kind of rule is invalid.
- NOTREACHED() << "Invalid exception rule";
- return 0;
+ // Feed |host| to the DAFSA in reverse character order.
+ size_t curr_pos = host_check_len - 1;
+
+ // If INCLUDE_UNKNOWN_REGISTRIES is true, we allow any initial label to be a
+ // TLD. TODO(nick): is this right? Are there tests? What if a TLD appears
+ // as part of a longer rule only?
Ryan Sleevi 2017/01/25 22:13:25 "If no rules match, the prevailing rule is '*'" I
ncarter (slow) 2017/01/26 00:33:02 Thanks for the clarification of the mapping betwee
Ryan Sleevi 2017/01/26 00:41:45 If INCLUDE_UNKNOWN_REGISTRIES is set, we return "u
+ size_t in_wildcard = (unknown_filter == INCLUDE_UNKNOWN_REGISTRIES);
+ size_t prevailing_rule_pos = in_wildcard ? host_check_len + 1 : host.length();
+ FixedSetIncrementalLookup tld_reverse_lookup(g_graph, g_graph_length);
+ while (curr_pos != host_check_begin - 1 &&
+ (tld_reverse_lookup.Advance(host[curr_pos]) || in_wildcard)) {
+ // Check the return value whenever we're at the end of a label.
+ if (curr_pos == host_check_begin || host[curr_pos - 1] == '.') {
+ int dafsa_result = tld_reverse_lookup.GetResultForCurrentSequence();
+ if (dafsa_result != kDafsaNotFound &&
+ ((dafsa_result & kDafsaPrivateRule) == 0 ||
+ private_filter == INCLUDE_PRIVATE_REGISTRIES)) {
+ if (dafsa_result & kDafsaExceptionRule) {
+ // Exception rules always win.
+ // TODO(nick): The old code talks about wildcards trumping when
+ // there's a subdomain. Are there unittests of that behavior?
ncarter (slow) 2017/01/26 00:33:02 This TODO is about a concern I have with equivalen
Ryan Sleevi 2017/01/26 00:41:45 Yeah, I'm trying to figure out from the PSL mainta
ncarter (slow) 2017/02/27 23:22:04 Shall I add a test for this case? It doesn't appea
+ size_t previous_dot = host.find('.', curr_pos);
+ if (previous_dot == std::string::npos) {
+ // If we get here, we had an exception rule with no dots (e.g.
+ // "!foo"). This would only be valid if we had a corresponding
+ // wildcard rule, which would have to be "*". But we explicitly
+ // disallow that case, so this kind of rule is invalid.
+ NOTREACHED() << "Invalid exception rule";
+ return 0;
+ }
+ DCHECK(in_wildcard);
+ DCHECK_EQ(previous_dot + 1, prevailing_rule_pos);
+ return host.length() - previous_dot - 1;
}
- return host.length() - next_dot - 1;
+ in_wildcard = (dafsa_result & kDafsaWildcardRule) != 0;
+ prevailing_rule_pos = curr_pos;
+ } else if (in_wildcard) {
+ in_wildcard = false;
+ prevailing_rule_pos = curr_pos;
}
-
- // If curr_start == host_check_begin, then the host is the registry
- // itself, so return 0.
- return (curr_start == host_check_begin) ? 0
- : (host.length() - curr_start);
}
-
- if (next_dot >= host_check_len) // Catches std::string::npos as well.
- break;
-
- prev_start = curr_start;
- curr_start = next_dot + 1;
- next_dot = host.find('.', curr_start);
+ --curr_pos;
}
+ // If the entire hostname is registry-controlled, fail.
+ if (in_wildcard || prevailing_rule_pos == host_check_begin)
+ return 0;
- // No rule found in the registry. curr_start now points to the first
- // character of the last subcomponent of the host, so if we allow unknown
- // registries, return the length of this subcomponent.
- return unknown_filter == INCLUDE_UNKNOWN_REGISTRIES ?
- (host.length() - curr_start) : 0;
+ return host.length() - prevailing_rule_pos;
}
base::StringPiece GetDomainAndRegistryImpl(
@@ -177,8 +166,8 @@ base::StringPiece GetDomainAndRegistryImpl(
return base::StringPiece();
}
- // Move past the dot preceding the registry, and search for the next previous
- // dot. Return the host from after that dot, or the whole host when there is
+ // Move past the dot preceding the registry, and search for the dot before
+ // that. Return the host from after that dot, or the whole host when there is
// no dot.
const size_t dot = host.rfind('.', host.length() - registry_length - 2);
if (dot == std::string::npos)
« no previous file with comments | « net/base/registry_controlled_domains/BUILD.gn ('k') | net/tools/dafsa/make_dafsa.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698