Index: net/base/registry_controlled_domains/registry_controlled_domain.cc |
diff --git a/net/base/registry_controlled_domains/registry_controlled_domain.cc b/net/base/registry_controlled_domains/registry_controlled_domain.cc |
index 3777582812bb7103d4062d13a22a53ad342aac8e..d23847988624e00fb0fddee616941e2e10a191b5 100644 |
--- a/net/base/registry_controlled_domains/registry_controlled_domain.cc |
+++ b/net/base/registry_controlled_domains/registry_controlled_domain.cc |
@@ -97,65 +97,54 @@ size_t GetRegistryLengthImpl(base::StringPiece host, |
return 0; // Multiple trailing dots. |
} |
- // Walk up the domain tree, most specific to least specific, |
- // looking for matches at each level. |
- size_t prev_start = std::string::npos; |
- size_t curr_start = host_check_begin; |
- size_t next_dot = host.find('.', curr_start); |
- if (next_dot >= host_check_len) // Catches std::string::npos as well. |
- return 0; // This can't have a registry + domain. |
- while (1) { |
- const char* domain_str = host.data() + curr_start; |
- size_t domain_length = host_check_len - curr_start; |
- int type = LookupStringInFixedSet(g_graph, g_graph_length, domain_str, |
- domain_length); |
- bool do_check = type != kDafsaNotFound && |
- (!(type & kDafsaPrivateRule) || |
- private_filter == INCLUDE_PRIVATE_REGISTRIES); |
- |
- // If the apparent match is a private registry and we're not including |
- // those, it can't be an actual match. |
- if (do_check) { |
- // Exception rules override wildcard rules when the domain is an exact |
- // match, but wildcards take precedence when there's a subdomain. |
- if (type & kDafsaWildcardRule && (prev_start != std::string::npos)) { |
- // If prev_start == host_check_begin, then the host is the registry |
- // itself, so return 0. |
- return (prev_start == host_check_begin) ? 0 |
- : (host.length() - prev_start); |
- } |
- |
- if (type & kDafsaExceptionRule) { |
- if (next_dot == std::string::npos) { |
- // If we get here, we had an exception rule with no dots (e.g. |
- // "!foo"). This would only be valid if we had a corresponding |
- // wildcard rule, which would have to be "*". But we explicitly |
- // disallow that case, so this kind of rule is invalid. |
- NOTREACHED() << "Invalid exception rule"; |
- return 0; |
+ // Feed |host| to the DAFSA in reverse character order. |
+ size_t curr_pos = host_check_len - 1; |
+ |
+ // If INCLUDE_UNKNOWN_REGISTRIES is true, we allow any initial label to be a |
+ // TLD. TODO(nick): is this right? Are there tests? What if a TLD appears |
+ // as part of a longer rule only? |
Ryan Sleevi
2017/01/25 22:13:25
"If no rules match, the prevailing rule is '*'"
I
ncarter (slow)
2017/01/26 00:33:02
Thanks for the clarification of the mapping betwee
Ryan Sleevi
2017/01/26 00:41:45
If INCLUDE_UNKNOWN_REGISTRIES is set, we return "u
|
+ size_t in_wildcard = (unknown_filter == INCLUDE_UNKNOWN_REGISTRIES); |
+ size_t prevailing_rule_pos = in_wildcard ? host_check_len + 1 : host.length(); |
+ FixedSetIncrementalLookup tld_reverse_lookup(g_graph, g_graph_length); |
+ while (curr_pos != host_check_begin - 1 && |
+ (tld_reverse_lookup.Advance(host[curr_pos]) || in_wildcard)) { |
+ // Check the return value whenever we're at the end of a label. |
+ if (curr_pos == host_check_begin || host[curr_pos - 1] == '.') { |
+ int dafsa_result = tld_reverse_lookup.GetResultForCurrentSequence(); |
+ if (dafsa_result != kDafsaNotFound && |
+ ((dafsa_result & kDafsaPrivateRule) == 0 || |
+ private_filter == INCLUDE_PRIVATE_REGISTRIES)) { |
+ if (dafsa_result & kDafsaExceptionRule) { |
+ // Exception rules always win. |
+ // TODO(nick): The old code talks about wildcards trumping when |
+ // there's a subdomain. Are there unittests of that behavior? |
ncarter (slow)
2017/01/26 00:33:02
This TODO is about a concern I have with equivalen
Ryan Sleevi
2017/01/26 00:41:45
Yeah, I'm trying to figure out from the PSL mainta
ncarter (slow)
2017/02/27 23:22:04
Shall I add a test for this case? It doesn't appea
|
+ size_t previous_dot = host.find('.', curr_pos); |
+ if (previous_dot == std::string::npos) { |
+ // If we get here, we had an exception rule with no dots (e.g. |
+ // "!foo"). This would only be valid if we had a corresponding |
+ // wildcard rule, which would have to be "*". But we explicitly |
+ // disallow that case, so this kind of rule is invalid. |
+ NOTREACHED() << "Invalid exception rule"; |
+ return 0; |
+ } |
+ DCHECK(in_wildcard); |
+ DCHECK_EQ(previous_dot + 1, prevailing_rule_pos); |
+ return host.length() - previous_dot - 1; |
} |
- return host.length() - next_dot - 1; |
+ in_wildcard = (dafsa_result & kDafsaWildcardRule) != 0; |
+ prevailing_rule_pos = curr_pos; |
+ } else if (in_wildcard) { |
+ in_wildcard = false; |
+ prevailing_rule_pos = curr_pos; |
} |
- |
- // If curr_start == host_check_begin, then the host is the registry |
- // itself, so return 0. |
- return (curr_start == host_check_begin) ? 0 |
- : (host.length() - curr_start); |
} |
- |
- if (next_dot >= host_check_len) // Catches std::string::npos as well. |
- break; |
- |
- prev_start = curr_start; |
- curr_start = next_dot + 1; |
- next_dot = host.find('.', curr_start); |
+ --curr_pos; |
} |
+ // If the entire hostname is registry-controlled, fail. |
+ if (in_wildcard || prevailing_rule_pos == host_check_begin) |
+ return 0; |
- // No rule found in the registry. curr_start now points to the first |
- // character of the last subcomponent of the host, so if we allow unknown |
- // registries, return the length of this subcomponent. |
- return unknown_filter == INCLUDE_UNKNOWN_REGISTRIES ? |
- (host.length() - curr_start) : 0; |
+ return host.length() - prevailing_rule_pos; |
} |
base::StringPiece GetDomainAndRegistryImpl( |
@@ -177,8 +166,8 @@ base::StringPiece GetDomainAndRegistryImpl( |
return base::StringPiece(); |
} |
- // Move past the dot preceding the registry, and search for the next previous |
- // dot. Return the host from after that dot, or the whole host when there is |
+ // Move past the dot preceding the registry, and search for the dot before |
+ // that. Return the host from after that dot, or the whole host when there is |
// no dot. |
const size_t dot = host.rfind('.', host.length() - registry_length - 2); |
if (dot == std::string::npos) |