OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene, | 5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene, |
6 // later modified by others), but almost entirely rewritten for Chrome. | 6 // later modified by others), but almost entirely rewritten for Chrome. |
7 // (netwerk/dns/src/nsEffectiveTLDService.cpp) | 7 // (netwerk/dns/src/nsEffectiveTLDService.cpp) |
8 /* ***** BEGIN LICENSE BLOCK ***** | 8 /* ***** BEGIN LICENSE BLOCK ***** |
9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 | 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
10 * | 10 * |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
67 const FindDomainPtr kDefaultFindDomainFunction = Perfect_Hash::FindDomain; | 67 const FindDomainPtr kDefaultFindDomainFunction = Perfect_Hash::FindDomain; |
68 | 68 |
69 // 'stringpool' is defined as a macro by the gperf-generated | 69 // 'stringpool' is defined as a macro by the gperf-generated |
70 // "effective_tld_names.cc". Provide a real constant value for it instead. | 70 // "effective_tld_names.cc". Provide a real constant value for it instead. |
71 const char* const kDefaultStringPool = stringpool; | 71 const char* const kDefaultStringPool = stringpool; |
72 #undef stringpool | 72 #undef stringpool |
73 | 73 |
74 FindDomainPtr g_find_domain_function = kDefaultFindDomainFunction; | 74 FindDomainPtr g_find_domain_function = kDefaultFindDomainFunction; |
75 const char* g_stringpool = kDefaultStringPool; | 75 const char* g_stringpool = kDefaultStringPool; |
76 | 76 |
77 size_t GetRegistryLengthImpl( | 77 size_t GetRegistryLengthImpl(const std::string& host, |
78 const std::string& host, | 78 UnknownRegistryFilter unknown_filter, |
79 UnknownRegistryFilter unknown_filter, | 79 PrivateRegistryFilter private_filter) { |
80 PrivateRegistryFilter private_filter) { | |
81 DCHECK(!host.empty()); | 80 DCHECK(!host.empty()); |
82 | 81 |
83 // Skip leading dots. | 82 // Skip leading dots. |
84 const size_t host_check_begin = host.find_first_not_of('.'); | 83 const size_t host_check_begin = host.find_first_not_of('.'); |
85 if (host_check_begin == std::string::npos) | 84 if (host_check_begin == std::string::npos) |
86 return 0; // Host is only dots. | 85 return 0; // Host is only dots. |
87 | 86 |
88 // A single trailing dot isn't relevant in this determination, but does need | 87 // A single trailing dot isn't relevant in this determination, but does need |
89 // to be included in the final returned length. | 88 // to be included in the final returned length. |
90 size_t host_check_len = host.length(); | 89 size_t host_check_len = host.length(); |
91 if (host[host_check_len - 1] == '.') { | 90 if (host[host_check_len - 1] == '.') { |
92 --host_check_len; | 91 --host_check_len; |
93 DCHECK(host_check_len > 0); // If this weren't true, the host would be ".", | 92 DCHECK(host_check_len > 0); // If this weren't true, the host would be ".", |
94 // and we'd have already returned above. | 93 // and we'd have already returned above. |
95 if (host[host_check_len - 1] == '.') | 94 if (host[host_check_len - 1] == '.') |
96 return 0; // Multiple trailing dots. | 95 return 0; // Multiple trailing dots. |
97 } | 96 } |
98 | 97 |
99 // Walk up the domain tree, most specific to least specific, | 98 // Walk up the domain tree, most specific to least specific, |
100 // looking for matches at each level. | 99 // looking for matches at each level. |
101 size_t prev_start = std::string::npos; | 100 size_t prev_start = std::string::npos; |
102 size_t curr_start = host_check_begin; | 101 size_t curr_start = host_check_begin; |
103 size_t next_dot = host.find('.', curr_start); | 102 size_t next_dot = host.find('.', curr_start); |
104 if (next_dot >= host_check_len) // Catches std::string::npos as well. | 103 if (next_dot >= host_check_len) // Catches std::string::npos as well. |
105 return 0; // This can't have a registry + domain. | 104 return 0; // This can't have a registry + domain. |
106 while (1) { | 105 while (1) { |
107 const char* domain_str = host.data() + curr_start; | 106 const char* domain_str = host.data() + curr_start; |
108 int domain_length = host_check_len - curr_start; | 107 int domain_length = host_check_len - curr_start; |
109 const DomainRule* rule = g_find_domain_function(domain_str, domain_length); | 108 const DomainRule* rule = g_find_domain_function(domain_str, domain_length); |
110 | 109 |
111 // We need to compare the string after finding a match because the | 110 // We need to compare the string after finding a match because the |
112 // no-collisions of perfect hashing only refers to items in the set. Since | 111 // no-collisions of perfect hashing only refers to items in the set. Since |
113 // we're searching for arbitrary domains, there could be collisions. | 112 // we're searching for arbitrary domains, there could be collisions. |
114 // Furthermore, if the apparent match is a private registry and we're not | 113 // Furthermore, if the apparent match is a private registry and we're not |
115 // including those, it can't be an actual match. | 114 // including those, it can't be an actual match. |
116 if (rule) { | 115 if (rule) { |
117 bool do_check = !(rule->type & kPrivateRule) || | 116 bool do_check = !(rule->type & kPrivateRule) || |
118 private_filter == INCLUDE_PRIVATE_REGISTRIES; | 117 private_filter == INCLUDE_PRIVATE_REGISTRIES; |
119 if (do_check && base::strncasecmp(domain_str, | 118 if (do_check && |
120 g_stringpool + rule->name_offset, | 119 base::strncasecmp(domain_str, |
121 domain_length) == 0) { | 120 g_stringpool + rule->name_offset, |
| 121 domain_length) == 0) { |
122 // Exception rules override wildcard rules when the domain is an exact | 122 // Exception rules override wildcard rules when the domain is an exact |
123 // match, but wildcards take precedence when there's a subdomain. | 123 // match, but wildcards take precedence when there's a subdomain. |
124 if (rule->type & kWildcardRule && (prev_start != std::string::npos)) { | 124 if (rule->type & kWildcardRule && (prev_start != std::string::npos)) { |
125 // If prev_start == host_check_begin, then the host is the registry | 125 // If prev_start == host_check_begin, then the host is the registry |
126 // itself, so return 0. | 126 // itself, so return 0. |
127 return (prev_start == host_check_begin) ? | 127 return (prev_start == host_check_begin) |
128 0 : (host.length() - prev_start); | 128 ? 0 |
| 129 : (host.length() - prev_start); |
129 } | 130 } |
130 | 131 |
131 if (rule->type & kExceptionRule) { | 132 if (rule->type & kExceptionRule) { |
132 if (next_dot == std::string::npos) { | 133 if (next_dot == std::string::npos) { |
133 // If we get here, we had an exception rule with no dots (e.g. | 134 // If we get here, we had an exception rule with no dots (e.g. |
134 // "!foo"). This would only be valid if we had a corresponding | 135 // "!foo"). This would only be valid if we had a corresponding |
135 // wildcard rule, which would have to be "*". But we explicitly | 136 // wildcard rule, which would have to be "*". But we explicitly |
136 // disallow that case, so this kind of rule is invalid. | 137 // disallow that case, so this kind of rule is invalid. |
137 NOTREACHED() << "Invalid exception rule"; | 138 NOTREACHED() << "Invalid exception rule"; |
138 return 0; | 139 return 0; |
139 } | 140 } |
140 return host.length() - next_dot - 1; | 141 return host.length() - next_dot - 1; |
141 } | 142 } |
142 | 143 |
143 // If curr_start == host_check_begin, then the host is the registry | 144 // If curr_start == host_check_begin, then the host is the registry |
144 // itself, so return 0. | 145 // itself, so return 0. |
145 return (curr_start == host_check_begin) ? | 146 return (curr_start == host_check_begin) ? 0 |
146 0 : (host.length() - curr_start); | 147 : (host.length() - curr_start); |
147 } | 148 } |
148 } | 149 } |
149 | 150 |
150 if (next_dot >= host_check_len) // Catches std::string::npos as well. | 151 if (next_dot >= host_check_len) // Catches std::string::npos as well. |
151 break; | 152 break; |
152 | 153 |
153 prev_start = curr_start; | 154 prev_start = curr_start; |
154 curr_start = next_dot + 1; | 155 curr_start = next_dot + 1; |
155 next_dot = host.find('.', curr_start); | 156 next_dot = host.find('.', curr_start); |
156 } | 157 } |
157 | 158 |
158 // No rule found in the registry. curr_start now points to the first | 159 // No rule found in the registry. curr_start now points to the first |
159 // character of the last subcomponent of the host, so if we allow unknown | 160 // character of the last subcomponent of the host, so if we allow unknown |
160 // registries, return the length of this subcomponent. | 161 // registries, return the length of this subcomponent. |
161 return unknown_filter == INCLUDE_UNKNOWN_REGISTRIES ? | 162 return unknown_filter == INCLUDE_UNKNOWN_REGISTRIES |
162 (host.length() - curr_start) : 0; | 163 ? (host.length() - curr_start) |
| 164 : 0; |
163 } | 165 } |
164 | 166 |
165 std::string GetDomainAndRegistryImpl( | 167 std::string GetDomainAndRegistryImpl(const std::string& host, |
166 const std::string& host, PrivateRegistryFilter private_filter) { | 168 PrivateRegistryFilter private_filter) { |
167 DCHECK(!host.empty()); | 169 DCHECK(!host.empty()); |
168 | 170 |
169 // Find the length of the registry for this host. | 171 // Find the length of the registry for this host. |
170 const size_t registry_length = | 172 const size_t registry_length = |
171 GetRegistryLengthImpl(host, INCLUDE_UNKNOWN_REGISTRIES, private_filter); | 173 GetRegistryLengthImpl(host, INCLUDE_UNKNOWN_REGISTRIES, private_filter); |
172 if ((registry_length == std::string::npos) || (registry_length == 0)) | 174 if ((registry_length == std::string::npos) || (registry_length == 0)) |
173 return std::string(); // No registry. | 175 return std::string(); // No registry. |
174 // The "2" in this next line is 1 for the dot, plus a 1-char minimum preceding | 176 // The "2" in this next line is 1 for the dot, plus a 1-char minimum preceding |
175 // subcomponent length. | 177 // subcomponent length. |
176 DCHECK(host.length() >= 2); | 178 DCHECK(host.length() >= 2); |
177 if (registry_length > (host.length() - 2)) { | 179 if (registry_length > (host.length() - 2)) { |
178 NOTREACHED() << | 180 NOTREACHED() |
179 "Host does not have at least one subcomponent before registry!"; | 181 << "Host does not have at least one subcomponent before registry!"; |
180 return std::string(); | 182 return std::string(); |
181 } | 183 } |
182 | 184 |
183 // Move past the dot preceding the registry, and search for the next previous | 185 // Move past the dot preceding the registry, and search for the next previous |
184 // dot. Return the host from after that dot, or the whole host when there is | 186 // dot. Return the host from after that dot, or the whole host when there is |
185 // no dot. | 187 // no dot. |
186 const size_t dot = host.rfind('.', host.length() - registry_length - 2); | 188 const size_t dot = host.rfind('.', host.length() - registry_length - 2); |
187 if (dot == std::string::npos) | 189 if (dot == std::string::npos) |
188 return host; | 190 return host; |
189 return host.substr(dot + 1); | 191 return host.substr(dot + 1); |
190 } | 192 } |
191 | 193 |
192 } // namespace | 194 } // namespace |
193 | 195 |
194 std::string GetDomainAndRegistry( | 196 std::string GetDomainAndRegistry(const GURL& gurl, |
195 const GURL& gurl, | 197 PrivateRegistryFilter filter) { |
196 PrivateRegistryFilter filter) { | |
197 const url::Component host = gurl.parsed_for_possibly_invalid_spec().host; | 198 const url::Component host = gurl.parsed_for_possibly_invalid_spec().host; |
198 if ((host.len <= 0) || gurl.HostIsIPAddress()) | 199 if ((host.len <= 0) || gurl.HostIsIPAddress()) |
199 return std::string(); | 200 return std::string(); |
200 return GetDomainAndRegistryImpl(std::string( | 201 return GetDomainAndRegistryImpl( |
201 gurl.possibly_invalid_spec().data() + host.begin, host.len), filter); | 202 std::string(gurl.possibly_invalid_spec().data() + host.begin, host.len), |
| 203 filter); |
202 } | 204 } |
203 | 205 |
204 std::string GetDomainAndRegistry( | 206 std::string GetDomainAndRegistry(const std::string& host, |
205 const std::string& host, | 207 PrivateRegistryFilter filter) { |
206 PrivateRegistryFilter filter) { | |
207 url::CanonHostInfo host_info; | 208 url::CanonHostInfo host_info; |
208 const std::string canon_host(CanonicalizeHost(host, &host_info)); | 209 const std::string canon_host(CanonicalizeHost(host, &host_info)); |
209 if (canon_host.empty() || host_info.IsIPAddress()) | 210 if (canon_host.empty() || host_info.IsIPAddress()) |
210 return std::string(); | 211 return std::string(); |
211 return GetDomainAndRegistryImpl(canon_host, filter); | 212 return GetDomainAndRegistryImpl(canon_host, filter); |
212 } | 213 } |
213 | 214 |
214 bool SameDomainOrHost( | 215 bool SameDomainOrHost(const GURL& gurl1, |
215 const GURL& gurl1, | 216 const GURL& gurl2, |
216 const GURL& gurl2, | 217 PrivateRegistryFilter filter) { |
217 PrivateRegistryFilter filter) { | |
218 // See if both URLs have a known domain + registry, and those values are the | 218 // See if both URLs have a known domain + registry, and those values are the |
219 // same. | 219 // same. |
220 const std::string domain1(GetDomainAndRegistry(gurl1, filter)); | 220 const std::string domain1(GetDomainAndRegistry(gurl1, filter)); |
221 const std::string domain2(GetDomainAndRegistry(gurl2, filter)); | 221 const std::string domain2(GetDomainAndRegistry(gurl2, filter)); |
222 if (!domain1.empty() || !domain2.empty()) | 222 if (!domain1.empty() || !domain2.empty()) |
223 return domain1 == domain2; | 223 return domain1 == domain2; |
224 | 224 |
225 // No domains. See if the hosts are identical. | 225 // No domains. See if the hosts are identical. |
226 const url::Component host1 = gurl1.parsed_for_possibly_invalid_spec().host; | 226 const url::Component host1 = gurl1.parsed_for_possibly_invalid_spec().host; |
227 const url::Component host2 = gurl2.parsed_for_possibly_invalid_spec().host; | 227 const url::Component host2 = gurl2.parsed_for_possibly_invalid_spec().host; |
228 if ((host1.len <= 0) || (host1.len != host2.len)) | 228 if ((host1.len <= 0) || (host1.len != host2.len)) |
229 return false; | 229 return false; |
230 return !strncmp(gurl1.possibly_invalid_spec().data() + host1.begin, | 230 return !strncmp(gurl1.possibly_invalid_spec().data() + host1.begin, |
231 gurl2.possibly_invalid_spec().data() + host2.begin, | 231 gurl2.possibly_invalid_spec().data() + host2.begin, |
232 host1.len); | 232 host1.len); |
233 } | 233 } |
234 | 234 |
235 size_t GetRegistryLength( | 235 size_t GetRegistryLength(const GURL& gurl, |
236 const GURL& gurl, | 236 UnknownRegistryFilter unknown_filter, |
237 UnknownRegistryFilter unknown_filter, | 237 PrivateRegistryFilter private_filter) { |
238 PrivateRegistryFilter private_filter) { | |
239 const url::Component host = gurl.parsed_for_possibly_invalid_spec().host; | 238 const url::Component host = gurl.parsed_for_possibly_invalid_spec().host; |
240 if (host.len <= 0) | 239 if (host.len <= 0) |
241 return std::string::npos; | 240 return std::string::npos; |
242 if (gurl.HostIsIPAddress()) | 241 if (gurl.HostIsIPAddress()) |
243 return 0; | 242 return 0; |
244 return GetRegistryLengthImpl( | 243 return GetRegistryLengthImpl( |
245 std::string(gurl.possibly_invalid_spec().data() + host.begin, host.len), | 244 std::string(gurl.possibly_invalid_spec().data() + host.begin, host.len), |
246 unknown_filter, | 245 unknown_filter, |
247 private_filter); | 246 private_filter); |
248 } | 247 } |
249 | 248 |
250 size_t GetRegistryLength( | 249 size_t GetRegistryLength(const std::string& host, |
251 const std::string& host, | 250 UnknownRegistryFilter unknown_filter, |
252 UnknownRegistryFilter unknown_filter, | 251 PrivateRegistryFilter private_filter) { |
253 PrivateRegistryFilter private_filter) { | |
254 url::CanonHostInfo host_info; | 252 url::CanonHostInfo host_info; |
255 const std::string canon_host(CanonicalizeHost(host, &host_info)); | 253 const std::string canon_host(CanonicalizeHost(host, &host_info)); |
256 if (canon_host.empty()) | 254 if (canon_host.empty()) |
257 return std::string::npos; | 255 return std::string::npos; |
258 if (host_info.IsIPAddress()) | 256 if (host_info.IsIPAddress()) |
259 return 0; | 257 return 0; |
260 return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter); | 258 return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter); |
261 } | 259 } |
262 | 260 |
263 void SetFindDomainFunctionAndStringPoolForTesting(FindDomainPtr function, | 261 void SetFindDomainFunctionAndStringPoolForTesting(FindDomainPtr function, |
264 const char* stringpool) { | 262 const char* stringpool) { |
265 g_find_domain_function = function ? function : kDefaultFindDomainFunction; | 263 g_find_domain_function = function ? function : kDefaultFindDomainFunction; |
266 g_stringpool = stringpool ? stringpool : kDefaultStringPool; | 264 g_stringpool = stringpool ? stringpool : kDefaultStringPool; |
267 } | 265 } |
268 | 266 |
269 } // namespace registry_controlled_domains | 267 } // namespace registry_controlled_domains |
270 } // namespace net | 268 } // namespace net |
OLD | NEW |