Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1374)

Side by Side Diff: net/base/registry_controlled_domains/registry_controlled_domain.cc

Issue 2649033004: [3 of 4] Speedup GetRegistryLengthImpl() by seeding the DAFSA in reverse
Patch Set: rebase Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « net/base/registry_controlled_domains/BUILD.gn ('k') | net/tools/dafsa/make_dafsa.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene, 5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene,
6 // later modified by others), but almost entirely rewritten for Chrome. 6 // later modified by others), but almost entirely rewritten for Chrome.
7 // (netwerk/dns/src/nsEffectiveTLDService.cpp) 7 // (netwerk/dns/src/nsEffectiveTLDService.cpp)
8 /* ***** BEGIN LICENSE BLOCK ***** 8 /* ***** BEGIN LICENSE BLOCK *****
9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
10 * 10 *
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
90 // to be included in the final returned length. 90 // to be included in the final returned length.
91 size_t host_check_len = host.length(); 91 size_t host_check_len = host.length();
92 if (host[host_check_len - 1] == '.') { 92 if (host[host_check_len - 1] == '.') {
93 --host_check_len; 93 --host_check_len;
94 DCHECK(host_check_len > 0); // If this weren't true, the host would be ".", 94 DCHECK(host_check_len > 0); // If this weren't true, the host would be ".",
95 // and we'd have already returned above. 95 // and we'd have already returned above.
96 if (host[host_check_len - 1] == '.') 96 if (host[host_check_len - 1] == '.')
97 return 0; // Multiple trailing dots. 97 return 0; // Multiple trailing dots.
98 } 98 }
99 99
100 // Walk up the domain tree, most specific to least specific, 100 // Feed |host| to the DAFSA in reverse character order.
101 // looking for matches at each level. 101 size_t curr_pos = host_check_len - 1;
102 size_t prev_start = std::string::npos;
103 size_t curr_start = host_check_begin;
104 size_t next_dot = host.find('.', curr_start);
105 if (next_dot >= host_check_len) // Catches std::string::npos as well.
106 return 0; // This can't have a registry + domain.
107 while (1) {
108 const char* domain_str = host.data() + curr_start;
109 size_t domain_length = host_check_len - curr_start;
110 int type = LookupStringInFixedSet(g_graph, g_graph_length, domain_str,
111 domain_length);
112 bool do_check = type != kDafsaNotFound &&
113 (!(type & kDafsaPrivateRule) ||
114 private_filter == INCLUDE_PRIVATE_REGISTRIES);
115 102
116 // If the apparent match is a private registry and we're not including 103 // If INCLUDE_UNKNOWN_REGISTRIES is true, we allow any initial label to be a
117 // those, it can't be an actual match. 104 // TLD. TODO(nick): is this right? Are there tests? What if a TLD appears
118 if (do_check) { 105 // as part of a longer rule only?
Ryan Sleevi 2017/01/25 22:13:25 "If no rules match, the prevailing rule is '*'" I
ncarter (slow) 2017/01/26 00:33:02 Thanks for the clarification of the mapping betwee
Ryan Sleevi 2017/01/26 00:41:45 If INCLUDE_UNKNOWN_REGISTRIES is set, we return "u
119 // Exception rules override wildcard rules when the domain is an exact 106 size_t in_wildcard = (unknown_filter == INCLUDE_UNKNOWN_REGISTRIES);
120 // match, but wildcards take precedence when there's a subdomain. 107 size_t prevailing_rule_pos = in_wildcard ? host_check_len + 1 : host.length();
121 if (type & kDafsaWildcardRule && (prev_start != std::string::npos)) { 108 FixedSetIncrementalLookup tld_reverse_lookup(g_graph, g_graph_length);
122 // If prev_start == host_check_begin, then the host is the registry 109 while (curr_pos != host_check_begin - 1 &&
123 // itself, so return 0. 110 (tld_reverse_lookup.Advance(host[curr_pos]) || in_wildcard)) {
124 return (prev_start == host_check_begin) ? 0 111 // Check the return value whenever we're at the end of a label.
125 : (host.length() - prev_start); 112 if (curr_pos == host_check_begin || host[curr_pos - 1] == '.') {
113 int dafsa_result = tld_reverse_lookup.GetResultForCurrentSequence();
114 if (dafsa_result != kDafsaNotFound &&
115 ((dafsa_result & kDafsaPrivateRule) == 0 ||
116 private_filter == INCLUDE_PRIVATE_REGISTRIES)) {
117 if (dafsa_result & kDafsaExceptionRule) {
118 // Exception rules always win.
119 // TODO(nick): The old code talks about wildcards trumping when
120 // there's a subdomain. Are there unittests of that behavior?
ncarter (slow) 2017/01/26 00:33:02 This TODO is about a concern I have with equivalen
Ryan Sleevi 2017/01/26 00:41:45 Yeah, I'm trying to figure out from the PSL mainta
ncarter (slow) 2017/02/27 23:22:04 Shall I add a test for this case? It doesn't appea
121 size_t previous_dot = host.find('.', curr_pos);
122 if (previous_dot == std::string::npos) {
123 // If we get here, we had an exception rule with no dots (e.g.
124 // "!foo"). This would only be valid if we had a corresponding
125 // wildcard rule, which would have to be "*". But we explicitly
126 // disallow that case, so this kind of rule is invalid.
127 NOTREACHED() << "Invalid exception rule";
128 return 0;
129 }
130 DCHECK(in_wildcard);
131 DCHECK_EQ(previous_dot + 1, prevailing_rule_pos);
132 return host.length() - previous_dot - 1;
133 }
134 in_wildcard = (dafsa_result & kDafsaWildcardRule) != 0;
135 prevailing_rule_pos = curr_pos;
136 } else if (in_wildcard) {
137 in_wildcard = false;
138 prevailing_rule_pos = curr_pos;
126 } 139 }
140 }
141 --curr_pos;
142 }
143 // If the entire hostname is registry-controlled, fail.
144 if (in_wildcard || prevailing_rule_pos == host_check_begin)
145 return 0;
127 146
128 if (type & kDafsaExceptionRule) { 147 return host.length() - prevailing_rule_pos;
129 if (next_dot == std::string::npos) {
130 // If we get here, we had an exception rule with no dots (e.g.
131 // "!foo"). This would only be valid if we had a corresponding
132 // wildcard rule, which would have to be "*". But we explicitly
133 // disallow that case, so this kind of rule is invalid.
134 NOTREACHED() << "Invalid exception rule";
135 return 0;
136 }
137 return host.length() - next_dot - 1;
138 }
139
140 // If curr_start == host_check_begin, then the host is the registry
141 // itself, so return 0.
142 return (curr_start == host_check_begin) ? 0
143 : (host.length() - curr_start);
144 }
145
146 if (next_dot >= host_check_len) // Catches std::string::npos as well.
147 break;
148
149 prev_start = curr_start;
150 curr_start = next_dot + 1;
151 next_dot = host.find('.', curr_start);
152 }
153
154 // No rule found in the registry. curr_start now points to the first
155 // character of the last subcomponent of the host, so if we allow unknown
156 // registries, return the length of this subcomponent.
157 return unknown_filter == INCLUDE_UNKNOWN_REGISTRIES ?
158 (host.length() - curr_start) : 0;
159 } 148 }
160 149
161 base::StringPiece GetDomainAndRegistryImpl( 150 base::StringPiece GetDomainAndRegistryImpl(
162 base::StringPiece host, 151 base::StringPiece host,
163 PrivateRegistryFilter private_filter) { 152 PrivateRegistryFilter private_filter) {
164 DCHECK(!host.empty()); 153 DCHECK(!host.empty());
165 154
166 // Find the length of the registry for this host. 155 // Find the length of the registry for this host.
167 const size_t registry_length = 156 const size_t registry_length =
168 GetRegistryLengthImpl(host, INCLUDE_UNKNOWN_REGISTRIES, private_filter); 157 GetRegistryLengthImpl(host, INCLUDE_UNKNOWN_REGISTRIES, private_filter);
169 if ((registry_length == std::string::npos) || (registry_length == 0)) 158 if ((registry_length == std::string::npos) || (registry_length == 0))
170 return base::StringPiece(); // No registry. 159 return base::StringPiece(); // No registry.
171 // The "2" in this next line is 1 for the dot, plus a 1-char minimum preceding 160 // The "2" in this next line is 1 for the dot, plus a 1-char minimum preceding
172 // subcomponent length. 161 // subcomponent length.
173 DCHECK(host.length() >= 2); 162 DCHECK(host.length() >= 2);
174 if (registry_length > (host.length() - 2)) { 163 if (registry_length > (host.length() - 2)) {
175 NOTREACHED() << 164 NOTREACHED() <<
176 "Host does not have at least one subcomponent before registry!"; 165 "Host does not have at least one subcomponent before registry!";
177 return base::StringPiece(); 166 return base::StringPiece();
178 } 167 }
179 168
180 // Move past the dot preceding the registry, and search for the next previous 169 // Move past the dot preceding the registry, and search for the dot before
181 // dot. Return the host from after that dot, or the whole host when there is 170 // that. Return the host from after that dot, or the whole host when there is
182 // no dot. 171 // no dot.
183 const size_t dot = host.rfind('.', host.length() - registry_length - 2); 172 const size_t dot = host.rfind('.', host.length() - registry_length - 2);
184 if (dot == std::string::npos) 173 if (dot == std::string::npos)
185 return host; 174 return host;
186 return host.substr(dot + 1); 175 return host.substr(dot + 1);
187 } 176 }
188 177
189 // Same as GetDomainAndRegistry, but returns the domain and registry as a 178 // Same as GetDomainAndRegistry, but returns the domain and registry as a
190 // StringPiece that references the underlying string of the passed-in |gurl|. 179 // StringPiece that references the underlying string of the passed-in |gurl|.
191 // TODO(pkalinnikov): Eliminate this helper by exposing StringPiece as the 180 // TODO(pkalinnikov): Eliminate this helper by exposing StringPiece as the
(...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after
441 430
442 void SetFindDomainGraph(const unsigned char* domains, size_t length) { 431 void SetFindDomainGraph(const unsigned char* domains, size_t length) {
443 CHECK(domains); 432 CHECK(domains);
444 CHECK_NE(length, 0u); 433 CHECK_NE(length, 0u);
445 g_graph = domains; 434 g_graph = domains;
446 g_graph_length = length; 435 g_graph_length = length;
447 } 436 }
448 437
449 } // namespace registry_controlled_domains 438 } // namespace registry_controlled_domains
450 } // namespace net 439 } // namespace net
OLDNEW
« no previous file with comments | « net/base/registry_controlled_domains/BUILD.gn ('k') | net/tools/dafsa/make_dafsa.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698