OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <utility> | 8 #include <utility> |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
205 bool is_tld_ascii = true; | 205 bool is_tld_ascii = true; |
206 size_t last_dot = host.rfind('.'); | 206 size_t last_dot = host.rfind('.'); |
207 if (last_dot != base::StringPiece::npos && | 207 if (last_dot != base::StringPiece::npos && |
208 host.substr(last_dot).starts_with(".xn--")) { | 208 host.substr(last_dot).starts_with(".xn--")) { |
209 is_tld_ascii = false; | 209 is_tld_ascii = false; |
210 } | 210 } |
211 | 211 |
212 // Do each component of the host separately, since we enforce script matching | 212 // Do each component of the host separately, since we enforce script matching |
213 // on a per-component basis. | 213 // on a per-component basis. |
214 base::string16 out16; | 214 base::string16 out16; |
| 215 bool has_idn_component = false; |
215 for (size_t component_start = 0, component_end; | 216 for (size_t component_start = 0, component_end; |
216 component_start < input16.length(); | 217 component_start < input16.length(); |
217 component_start = component_end + 1) { | 218 component_start = component_end + 1) { |
218 // Find the end of the component. | 219 // Find the end of the component. |
219 component_end = input16.find('.', component_start); | 220 component_end = input16.find('.', component_start); |
220 if (component_end == base::string16::npos) | 221 if (component_end == base::string16::npos) |
221 component_end = input16.length(); // For getting the last component. | 222 component_end = input16.length(); // For getting the last component. |
222 size_t component_length = component_end - component_start; | 223 size_t component_length = component_end - component_start; |
223 size_t new_component_start = out16.length(); | 224 size_t new_component_start = out16.length(); |
224 bool converted_idn = false; | 225 bool converted_idn = false; |
225 if (component_end > component_start) { | 226 if (component_end > component_start) { |
226 // Add the substring that we just found. | 227 // Add the substring that we just found. |
227 converted_idn = | 228 converted_idn = |
228 IDNToUnicodeOneComponent(input16.data() + component_start, | 229 IDNToUnicodeOneComponent(input16.data() + component_start, |
229 component_length, is_tld_ascii, &out16); | 230 component_length, is_tld_ascii, &out16); |
| 231 has_idn_component |= converted_idn; |
230 } | 232 } |
231 size_t new_component_length = out16.length() - new_component_start; | 233 size_t new_component_length = out16.length() - new_component_start; |
232 | 234 |
233 if (converted_idn && adjustments) { | 235 if (converted_idn && adjustments) { |
234 adjustments->push_back(base::OffsetAdjuster::Adjustment( | 236 adjustments->push_back(base::OffsetAdjuster::Adjustment( |
235 component_start, component_length, new_component_length)); | 237 component_start, component_length, new_component_length)); |
236 } | 238 } |
237 | 239 |
238 // Need to add the dot we just found (if we found one). | 240 // Need to add the dot we just found (if we found one). |
239 if (component_end < input16.length()) | 241 if (component_end < input16.length()) |
240 out16.push_back('.'); | 242 out16.push_back('.'); |
241 } | 243 } |
| 244 |
| 245 // Leave as punycode any inputs that spoof top domains. |
| 246 if (has_idn_component && |
| 247 g_idn_spoof_checker.Get().SimilarToTopDomains(out16)) { |
| 248 if (adjustments) |
| 249 adjustments->clear(); |
| 250 return input16; |
| 251 } |
| 252 |
242 return out16; | 253 return out16; |
243 } | 254 } |
244 | 255 |
245 // Returns true if the given Unicode host component is safe to display to the | 256 // Returns true if the given Unicode host component is safe to display to the |
246 // user. Note that this function does not deal with pure ASCII domain labels at | 257 // user. Note that this function does not deal with pure ASCII domain labels at |
247 // all even though it's possible to make up look-alike labels with ASCII | 258 // all even though it's possible to make up look-alike labels with ASCII |
248 // characters alone. | 259 // characters alone. |
249 bool IsIDNComponentSafe(base::StringPiece16 label, bool is_tld_ascii) { | 260 bool IsIDNComponentSafe(base::StringPiece16 label, bool is_tld_ascii) { |
250 return g_idn_spoof_checker.Get().SafeToDisplayAsUnicode(label, is_tld_ascii); | 261 return g_idn_spoof_checker.Get().SafeToDisplayAsUnicode(label, is_tld_ascii); |
251 } | 262 } |
(...skipping 314 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
566 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) | 577 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) |
567 ? text.substr(www.length()) : text; | 578 ? text.substr(www.length()) : text; |
568 } | 579 } |
569 | 580 |
570 base::string16 StripWWWFromHost(const GURL& url) { | 581 base::string16 StripWWWFromHost(const GURL& url) { |
571 DCHECK(url.is_valid()); | 582 DCHECK(url.is_valid()); |
572 return StripWWW(base::ASCIIToUTF16(url.host_piece())); | 583 return StripWWW(base::ASCIIToUTF16(url.host_piece())); |
573 } | 584 } |
574 | 585 |
575 } // namespace url_formatter | 586 } // namespace url_formatter |
OLD | NEW |