OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene, | 5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene, |
6 // later modified by others), but almost entirely rewritten for Chrome. | 6 // later modified by others), but almost entirely rewritten for Chrome. |
7 // (netwerk/dns/src/nsEffectiveTLDService.cpp) | 7 // (netwerk/dns/src/nsEffectiveTLDService.cpp) |
8 /* ***** BEGIN LICENSE BLOCK ***** | 8 /* ***** BEGIN LICENSE BLOCK ***** |
9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 | 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
10 * | 10 * |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
90 // to be included in the final returned length. | 90 // to be included in the final returned length. |
91 size_t host_check_len = host.length(); | 91 size_t host_check_len = host.length(); |
92 if (host[host_check_len - 1] == '.') { | 92 if (host[host_check_len - 1] == '.') { |
93 --host_check_len; | 93 --host_check_len; |
94 DCHECK(host_check_len > 0); // If this weren't true, the host would be ".", | 94 DCHECK(host_check_len > 0); // If this weren't true, the host would be ".", |
95 // and we'd have already returned above. | 95 // and we'd have already returned above. |
96 if (host[host_check_len - 1] == '.') | 96 if (host[host_check_len - 1] == '.') |
97 return 0; // Multiple trailing dots. | 97 return 0; // Multiple trailing dots. |
98 } | 98 } |
99 | 99 |
100 // Walk up the domain tree, most specific to least specific, | 100 // Feed |host| to the DAFSA in reverse character order. |
101 // looking for matches at each level. | 101 size_t curr_pos = host_check_len - 1; |
102 size_t prev_start = std::string::npos; | |
103 size_t curr_start = host_check_begin; | |
104 size_t next_dot = host.find('.', curr_start); | |
105 if (next_dot >= host_check_len) // Catches std::string::npos as well. | |
106 return 0; // This can't have a registry + domain. | |
107 while (1) { | |
108 const char* domain_str = host.data() + curr_start; | |
109 size_t domain_length = host_check_len - curr_start; | |
110 int type = LookupStringInFixedSet(g_graph, g_graph_length, domain_str, | |
111 domain_length); | |
112 bool do_check = type != kDafsaNotFound && | |
113 (!(type & kDafsaPrivateRule) || | |
114 private_filter == INCLUDE_PRIVATE_REGISTRIES); | |
115 | 102 |
116 // If the apparent match is a private registry and we're not including | 103 // If INCLUDE_UNKNOWN_REGISTRIES is true, we allow any initial label to be a |
117 // those, it can't be an actual match. | 104 // TLD. TODO(nick): is this right? Are there tests? What if a TLD appears |
118 if (do_check) { | 105 // as part of a longer rule only? |
Ryan Sleevi
2017/01/25 22:13:25
"If no rules match, the prevailing rule is '*'"
I
ncarter (slow)
2017/01/26 00:33:02
Thanks for the clarification of the mapping betwee
Ryan Sleevi
2017/01/26 00:41:45
If INCLUDE_UNKNOWN_REGISTRIES is set, we return "u
| |
119 // Exception rules override wildcard rules when the domain is an exact | 106 size_t in_wildcard = (unknown_filter == INCLUDE_UNKNOWN_REGISTRIES); |
120 // match, but wildcards take precedence when there's a subdomain. | 107 size_t prevailing_rule_pos = in_wildcard ? host_check_len + 1 : host.length(); |
121 if (type & kDafsaWildcardRule && (prev_start != std::string::npos)) { | 108 FixedSetIncrementalLookup tld_reverse_lookup(g_graph, g_graph_length); |
122 // If prev_start == host_check_begin, then the host is the registry | 109 while (curr_pos != host_check_begin - 1 && |
123 // itself, so return 0. | 110 (tld_reverse_lookup.Advance(host[curr_pos]) || in_wildcard)) { |
124 return (prev_start == host_check_begin) ? 0 | 111 // Check the return value whenever we're at the end of a label. |
125 : (host.length() - prev_start); | 112 if (curr_pos == host_check_begin || host[curr_pos - 1] == '.') { |
113 int dafsa_result = tld_reverse_lookup.GetResultForCurrentSequence(); | |
114 if (dafsa_result != kDafsaNotFound && | |
115 ((dafsa_result & kDafsaPrivateRule) == 0 || | |
116 private_filter == INCLUDE_PRIVATE_REGISTRIES)) { | |
117 if (dafsa_result & kDafsaExceptionRule) { | |
118 // Exception rules always win. | |
119 // TODO(nick): The old code talks about wildcards trumping when | |
120 // there's a subdomain. Are there unittests of that behavior? | |
ncarter (slow)
2017/01/26 00:33:02
This TODO is about a concern I have with equivalen
Ryan Sleevi
2017/01/26 00:41:45
Yeah, I'm trying to figure out from the PSL mainta
ncarter (slow)
2017/02/27 23:22:04
Shall I add a test for this case? It doesn't appea
| |
121 size_t previous_dot = host.find('.', curr_pos); | |
122 if (previous_dot == std::string::npos) { | |
123 // If we get here, we had an exception rule with no dots (e.g. | |
124 // "!foo"). This would only be valid if we had a corresponding | |
125 // wildcard rule, which would have to be "*". But we explicitly | |
126 // disallow that case, so this kind of rule is invalid. | |
127 NOTREACHED() << "Invalid exception rule"; | |
128 return 0; | |
129 } | |
130 DCHECK(in_wildcard); | |
131 DCHECK_EQ(previous_dot + 1, prevailing_rule_pos); | |
132 return host.length() - previous_dot - 1; | |
133 } | |
134 in_wildcard = (dafsa_result & kDafsaWildcardRule) != 0; | |
135 prevailing_rule_pos = curr_pos; | |
136 } else if (in_wildcard) { | |
137 in_wildcard = false; | |
138 prevailing_rule_pos = curr_pos; | |
126 } | 139 } |
140 } | |
141 --curr_pos; | |
142 } | |
143 // If the entire hostname is registry-controlled, fail. | |
144 if (in_wildcard || prevailing_rule_pos == host_check_begin) | |
145 return 0; | |
127 | 146 |
128 if (type & kDafsaExceptionRule) { | 147 return host.length() - prevailing_rule_pos; |
129 if (next_dot == std::string::npos) { | |
130 // If we get here, we had an exception rule with no dots (e.g. | |
131 // "!foo"). This would only be valid if we had a corresponding | |
132 // wildcard rule, which would have to be "*". But we explicitly | |
133 // disallow that case, so this kind of rule is invalid. | |
134 NOTREACHED() << "Invalid exception rule"; | |
135 return 0; | |
136 } | |
137 return host.length() - next_dot - 1; | |
138 } | |
139 | |
140 // If curr_start == host_check_begin, then the host is the registry | |
141 // itself, so return 0. | |
142 return (curr_start == host_check_begin) ? 0 | |
143 : (host.length() - curr_start); | |
144 } | |
145 | |
146 if (next_dot >= host_check_len) // Catches std::string::npos as well. | |
147 break; | |
148 | |
149 prev_start = curr_start; | |
150 curr_start = next_dot + 1; | |
151 next_dot = host.find('.', curr_start); | |
152 } | |
153 | |
154 // No rule found in the registry. curr_start now points to the first | |
155 // character of the last subcomponent of the host, so if we allow unknown | |
156 // registries, return the length of this subcomponent. | |
157 return unknown_filter == INCLUDE_UNKNOWN_REGISTRIES ? | |
158 (host.length() - curr_start) : 0; | |
159 } | 148 } |
160 | 149 |
161 base::StringPiece GetDomainAndRegistryImpl( | 150 base::StringPiece GetDomainAndRegistryImpl( |
162 base::StringPiece host, | 151 base::StringPiece host, |
163 PrivateRegistryFilter private_filter) { | 152 PrivateRegistryFilter private_filter) { |
164 DCHECK(!host.empty()); | 153 DCHECK(!host.empty()); |
165 | 154 |
166 // Find the length of the registry for this host. | 155 // Find the length of the registry for this host. |
167 const size_t registry_length = | 156 const size_t registry_length = |
168 GetRegistryLengthImpl(host, INCLUDE_UNKNOWN_REGISTRIES, private_filter); | 157 GetRegistryLengthImpl(host, INCLUDE_UNKNOWN_REGISTRIES, private_filter); |
169 if ((registry_length == std::string::npos) || (registry_length == 0)) | 158 if ((registry_length == std::string::npos) || (registry_length == 0)) |
170 return base::StringPiece(); // No registry. | 159 return base::StringPiece(); // No registry. |
171 // The "2" in this next line is 1 for the dot, plus a 1-char minimum preceding | 160 // The "2" in this next line is 1 for the dot, plus a 1-char minimum preceding |
172 // subcomponent length. | 161 // subcomponent length. |
173 DCHECK(host.length() >= 2); | 162 DCHECK(host.length() >= 2); |
174 if (registry_length > (host.length() - 2)) { | 163 if (registry_length > (host.length() - 2)) { |
175 NOTREACHED() << | 164 NOTREACHED() << |
176 "Host does not have at least one subcomponent before registry!"; | 165 "Host does not have at least one subcomponent before registry!"; |
177 return base::StringPiece(); | 166 return base::StringPiece(); |
178 } | 167 } |
179 | 168 |
180 // Move past the dot preceding the registry, and search for the next previous | 169 // Move past the dot preceding the registry, and search for the dot before |
181 // dot. Return the host from after that dot, or the whole host when there is | 170 // that. Return the host from after that dot, or the whole host when there is |
182 // no dot. | 171 // no dot. |
183 const size_t dot = host.rfind('.', host.length() - registry_length - 2); | 172 const size_t dot = host.rfind('.', host.length() - registry_length - 2); |
184 if (dot == std::string::npos) | 173 if (dot == std::string::npos) |
185 return host; | 174 return host; |
186 return host.substr(dot + 1); | 175 return host.substr(dot + 1); |
187 } | 176 } |
188 | 177 |
189 // Same as GetDomainAndRegistry, but returns the domain and registry as a | 178 // Same as GetDomainAndRegistry, but returns the domain and registry as a |
190 // StringPiece that references the underlying string of the passed-in |gurl|. | 179 // StringPiece that references the underlying string of the passed-in |gurl|. |
191 // TODO(pkalinnikov): Eliminate this helper by exposing StringPiece as the | 180 // TODO(pkalinnikov): Eliminate this helper by exposing StringPiece as the |
(...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
441 | 430 |
442 void SetFindDomainGraph(const unsigned char* domains, size_t length) { | 431 void SetFindDomainGraph(const unsigned char* domains, size_t length) { |
443 CHECK(domains); | 432 CHECK(domains); |
444 CHECK_NE(length, 0u); | 433 CHECK_NE(length, 0u); |
445 g_graph = domains; | 434 g_graph = domains; |
446 g_graph_length = length; | 435 g_graph_length = length; |
447 } | 436 } |
448 | 437 |
449 } // namespace registry_controlled_domains | 438 } // namespace registry_controlled_domains |
450 } // namespace net | 439 } // namespace net |
OLD | NEW |