OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <utility> | 8 #include <utility> |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
205 bool is_tld_ascii = true; | 205 bool is_tld_ascii = true; |
206 size_t last_dot = host.rfind('.'); | 206 size_t last_dot = host.rfind('.'); |
207 if (last_dot != base::StringPiece::npos && | 207 if (last_dot != base::StringPiece::npos && |
208 host.substr(last_dot).starts_with(".xn--")) { | 208 host.substr(last_dot).starts_with(".xn--")) { |
209 is_tld_ascii = false; | 209 is_tld_ascii = false; |
210 } | 210 } |
211 | 211 |
212 // Do each component of the host separately, since we enforce script matching | 212 // Do each component of the host separately, since we enforce script matching |
213 // on a per-component basis. | 213 // on a per-component basis. |
214 base::string16 out16; | 214 base::string16 out16; |
215 bool has_idn_component = false; | |
216 for (size_t component_start = 0, component_end; | 215 for (size_t component_start = 0, component_end; |
217 component_start < input16.length(); | 216 component_start < input16.length(); |
218 component_start = component_end + 1) { | 217 component_start = component_end + 1) { |
219 // Find the end of the component. | 218 // Find the end of the component. |
220 component_end = input16.find('.', component_start); | 219 component_end = input16.find('.', component_start); |
221 if (component_end == base::string16::npos) | 220 if (component_end == base::string16::npos) |
222 component_end = input16.length(); // For getting the last component. | 221 component_end = input16.length(); // For getting the last component. |
223 size_t component_length = component_end - component_start; | 222 size_t component_length = component_end - component_start; |
224 size_t new_component_start = out16.length(); | 223 size_t new_component_start = out16.length(); |
225 bool converted_idn = false; | 224 bool converted_idn = false; |
226 if (component_end > component_start) { | 225 if (component_end > component_start) { |
227 // Add the substring that we just found. | 226 // Add the substring that we just found. |
228 converted_idn = | 227 converted_idn = |
229 IDNToUnicodeOneComponent(input16.data() + component_start, | 228 IDNToUnicodeOneComponent(input16.data() + component_start, |
230 component_length, is_tld_ascii, &out16); | 229 component_length, is_tld_ascii, &out16); |
231 has_idn_component |= converted_idn; | |
232 } | 230 } |
233 size_t new_component_length = out16.length() - new_component_start; | 231 size_t new_component_length = out16.length() - new_component_start; |
234 | 232 |
235 if (converted_idn && adjustments) { | 233 if (converted_idn && adjustments) { |
236 adjustments->push_back(base::OffsetAdjuster::Adjustment( | 234 adjustments->push_back(base::OffsetAdjuster::Adjustment( |
237 component_start, component_length, new_component_length)); | 235 component_start, component_length, new_component_length)); |
238 } | 236 } |
239 | 237 |
240 // Need to add the dot we just found (if we found one). | 238 // Need to add the dot we just found (if we found one). |
241 if (component_end < input16.length()) | 239 if (component_end < input16.length()) |
242 out16.push_back('.'); | 240 out16.push_back('.'); |
243 } | 241 } |
244 | |
245 // Leave as punycode any inputs that spoof top domains. | |
246 if (has_idn_component && | |
247 g_idn_spoof_checker.Get().SimilarToTopDomains(out16)) { | |
248 if (adjustments) | |
249 adjustments->clear(); | |
250 return input16; | |
251 } | |
252 | |
253 return out16; | 242 return out16; |
254 } | 243 } |
255 | 244 |
256 // Returns true if the given Unicode host component is safe to display to the | 245 // Returns true if the given Unicode host component is safe to display to the |
257 // user. Note that this function does not deal with pure ASCII domain labels at | 246 // user. Note that this function does not deal with pure ASCII domain labels at |
258 // all even though it's possible to make up look-alike labels with ASCII | 247 // all even though it's possible to make up look-alike labels with ASCII |
259 // characters alone. | 248 // characters alone. |
260 bool IsIDNComponentSafe(base::StringPiece16 label, bool is_tld_ascii) { | 249 bool IsIDNComponentSafe(base::StringPiece16 label, bool is_tld_ascii) { |
261 return g_idn_spoof_checker.Get().SafeToDisplayAsUnicode(label, is_tld_ascii); | 250 return g_idn_spoof_checker.Get().SafeToDisplayAsUnicode(label, is_tld_ascii); |
262 } | 251 } |
(...skipping 314 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
577 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) | 566 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) |
578 ? text.substr(www.length()) : text; | 567 ? text.substr(www.length()) : text; |
579 } | 568 } |
580 | 569 |
581 base::string16 StripWWWFromHost(const GURL& url) { | 570 base::string16 StripWWWFromHost(const GURL& url) { |
582 DCHECK(url.is_valid()); | 571 DCHECK(url.is_valid()); |
583 return StripWWW(base::ASCIIToUTF16(url.host_piece())); | 572 return StripWWW(base::ASCIIToUTF16(url.host_piece())); |
584 } | 573 } |
585 | 574 |
586 } // namespace url_formatter | 575 } // namespace url_formatter |
OLD | NEW |