Chromium Code Reviews| Index: net/base/registry_controlled_domains/registry_controlled_domain.cc |
| diff --git a/net/base/registry_controlled_domains/registry_controlled_domain.cc b/net/base/registry_controlled_domains/registry_controlled_domain.cc |
| index 3777582812bb7103d4062d13a22a53ad342aac8e..d23847988624e00fb0fddee616941e2e10a191b5 100644 |
| --- a/net/base/registry_controlled_domains/registry_controlled_domain.cc |
| +++ b/net/base/registry_controlled_domains/registry_controlled_domain.cc |
| @@ -97,65 +97,54 @@ size_t GetRegistryLengthImpl(base::StringPiece host, |
| return 0; // Multiple trailing dots. |
| } |
| - // Walk up the domain tree, most specific to least specific, |
| - // looking for matches at each level. |
| - size_t prev_start = std::string::npos; |
| - size_t curr_start = host_check_begin; |
| - size_t next_dot = host.find('.', curr_start); |
| - if (next_dot >= host_check_len) // Catches std::string::npos as well. |
| - return 0; // This can't have a registry + domain. |
| - while (1) { |
| - const char* domain_str = host.data() + curr_start; |
| - size_t domain_length = host_check_len - curr_start; |
| - int type = LookupStringInFixedSet(g_graph, g_graph_length, domain_str, |
| - domain_length); |
| - bool do_check = type != kDafsaNotFound && |
| - (!(type & kDafsaPrivateRule) || |
| - private_filter == INCLUDE_PRIVATE_REGISTRIES); |
| - |
| - // If the apparent match is a private registry and we're not including |
| - // those, it can't be an actual match. |
| - if (do_check) { |
| - // Exception rules override wildcard rules when the domain is an exact |
| - // match, but wildcards take precedence when there's a subdomain. |
| - if (type & kDafsaWildcardRule && (prev_start != std::string::npos)) { |
| - // If prev_start == host_check_begin, then the host is the registry |
| - // itself, so return 0. |
| - return (prev_start == host_check_begin) ? 0 |
| - : (host.length() - prev_start); |
| - } |
| - |
| - if (type & kDafsaExceptionRule) { |
| - if (next_dot == std::string::npos) { |
| - // If we get here, we had an exception rule with no dots (e.g. |
| - // "!foo"). This would only be valid if we had a corresponding |
| - // wildcard rule, which would have to be "*". But we explicitly |
| - // disallow that case, so this kind of rule is invalid. |
| - NOTREACHED() << "Invalid exception rule"; |
| - return 0; |
| + // Feed |host| to the DAFSA in reverse character order. |
| + size_t curr_pos = host_check_len - 1; |
| + |
| + // If INCLUDE_UNKNOWN_REGISTRIES is true, we allow any initial label to be a |
| + // TLD. TODO(nick): is this right? Are there tests? What if a TLD appears |
| + // as part of a longer rule only? |
|
Ryan Sleevi
2017/01/25 22:13:25
"If no rules match, the prevailing rule is '*'"
I
ncarter (slow)
2017/01/26 00:33:02
Thanks for the clarification of the mapping betwee
Ryan Sleevi
2017/01/26 00:41:45
If INCLUDE_UNKNOWN_REGISTRIES is set, we return "u
|
| + size_t in_wildcard = (unknown_filter == INCLUDE_UNKNOWN_REGISTRIES); |
| + size_t prevailing_rule_pos = in_wildcard ? host_check_len + 1 : host.length(); |
| + FixedSetIncrementalLookup tld_reverse_lookup(g_graph, g_graph_length); |
| + while (curr_pos != host_check_begin - 1 && |
| + (tld_reverse_lookup.Advance(host[curr_pos]) || in_wildcard)) { |
| + // Check the return value whenever we're at the end of a label. |
| + if (curr_pos == host_check_begin || host[curr_pos - 1] == '.') { |
| + int dafsa_result = tld_reverse_lookup.GetResultForCurrentSequence(); |
| + if (dafsa_result != kDafsaNotFound && |
| + ((dafsa_result & kDafsaPrivateRule) == 0 || |
| + private_filter == INCLUDE_PRIVATE_REGISTRIES)) { |
| + if (dafsa_result & kDafsaExceptionRule) { |
| + // Exception rules always win. |
| + // TODO(nick): The old code talks about wildcards trumping when |
| + // there's a subdomain. Are there unittests of that behavior? |
|
ncarter (slow)
2017/01/26 00:33:02
This TODO is about a concern I have with equivalen
Ryan Sleevi
2017/01/26 00:41:45
Yeah, I'm trying to figure out from the PSL mainta
ncarter (slow)
2017/02/27 23:22:04
Shall I add a test for this case? It doesn't appea
|
| + size_t previous_dot = host.find('.', curr_pos); |
| + if (previous_dot == std::string::npos) { |
| + // If we get here, we had an exception rule with no dots (e.g. |
| + // "!foo"). This would only be valid if we had a corresponding |
| + // wildcard rule, which would have to be "*". But we explicitly |
| + // disallow that case, so this kind of rule is invalid. |
| + NOTREACHED() << "Invalid exception rule"; |
| + return 0; |
| + } |
| + DCHECK(in_wildcard); |
| + DCHECK_EQ(previous_dot + 1, prevailing_rule_pos); |
| + return host.length() - previous_dot - 1; |
| } |
| - return host.length() - next_dot - 1; |
| + in_wildcard = (dafsa_result & kDafsaWildcardRule) != 0; |
| + prevailing_rule_pos = curr_pos; |
| + } else if (in_wildcard) { |
| + in_wildcard = false; |
| + prevailing_rule_pos = curr_pos; |
| } |
| - |
| - // If curr_start == host_check_begin, then the host is the registry |
| - // itself, so return 0. |
| - return (curr_start == host_check_begin) ? 0 |
| - : (host.length() - curr_start); |
| } |
| - |
| - if (next_dot >= host_check_len) // Catches std::string::npos as well. |
| - break; |
| - |
| - prev_start = curr_start; |
| - curr_start = next_dot + 1; |
| - next_dot = host.find('.', curr_start); |
| + --curr_pos; |
| } |
| + // If the entire hostname is registry-controlled, fail. |
| + if (in_wildcard || prevailing_rule_pos == host_check_begin) |
| + return 0; |
| - // No rule found in the registry. curr_start now points to the first |
| - // character of the last subcomponent of the host, so if we allow unknown |
| - // registries, return the length of this subcomponent. |
| - return unknown_filter == INCLUDE_UNKNOWN_REGISTRIES ? |
| - (host.length() - curr_start) : 0; |
| + return host.length() - prevailing_rule_pos; |
| } |
| base::StringPiece GetDomainAndRegistryImpl( |
| @@ -177,8 +166,8 @@ base::StringPiece GetDomainAndRegistryImpl( |
| return base::StringPiece(); |
| } |
| - // Move past the dot preceding the registry, and search for the next previous |
| - // dot. Return the host from after that dot, or the whole host when there is |
| + // Move past the dot preceding the registry, and search for the dot before |
| + // that. Return the host from after that dot, or the whole host when there is |
| // no dot. |
| const size_t dot = host.rfind('.', host.length() - registry_length - 2); |
| if (dot == std::string::npos) |