components/url_formatter/url_formatter.cc - Issue 2683793010: Block domain labels made of Cyrillic letters that look alike Latin

Side by Side Diff: components/url_formatter/url_formatter.cc

Issue 2683793010: Block domain labels made of Cyrillic letters that look alike Latin (Closed)

Patch Set: go back to ps11 Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2015 The Chromium Authors. All rights reserved.	1 // Copyright 2015 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/url_formatter/url_formatter.h"	5 #include "components/url_formatter/url_formatter.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <utility>	8 #include <utility>

9	9

10 #include "base/lazy_instance.h"	10 #include "base/lazy_instance.h"

11 #include "base/macros.h"	11 #include "base/macros.h"

12 #include "base/numerics/safe_conversions.h"	12 #include "base/numerics/safe_conversions.h"

13 #include "base/strings/string_piece.h"	13 #include "base/strings/string_piece.h"

14 #include "base/strings/string_util.h"	14 #include "base/strings/string_util.h"

15 #include "base/strings/utf_offset_string_conversions.h"	15 #include "base/strings/utf_offset_string_conversions.h"

16 #include "base/strings/utf_string_conversions.h"	16 #include "base/strings/utf_string_conversions.h"

17 #include "base/threading/thread_local_storage.h"	17 #include "base/threading/thread_local_storage.h"

	18 #include "third_party/icu/source/common/unicode/schriter.h"

18 #include "third_party/icu/source/common/unicode/uidna.h"	19 #include "third_party/icu/source/common/unicode/uidna.h"

19 #include "third_party/icu/source/common/unicode/uniset.h"	20 #include "third_party/icu/source/common/unicode/uniset.h"

20 #include "third_party/icu/source/common/unicode/uscript.h"	21 #include "third_party/icu/source/common/unicode/uscript.h"

21 #include "third_party/icu/source/common/unicode/uvernum.h"	22 #include "third_party/icu/source/common/unicode/uvernum.h"

22 #include "third_party/icu/source/i18n/unicode/regex.h"	23 #include "third_party/icu/source/i18n/unicode/regex.h"

23 #include "third_party/icu/source/i18n/unicode/uspoof.h"	24 #include "third_party/icu/source/i18n/unicode/uspoof.h"

24 #include "url/gurl.h"	25 #include "url/gurl.h"

25 #include "url/third_party/mozilla/url_parse.h"	26 #include "url/third_party/mozilla/url_parse.h"

26	27

27 namespace url_formatter {	28 namespace url_formatter {

28	29

29 namespace {	30 namespace {

30	31

31 base::string16 IDNToUnicodeWithAdjustments(	32 base::string16 IDNToUnicodeWithAdjustments(

32 base::StringPiece host,	33 base::StringPiece host,

33 base::OffsetAdjuster::Adjustments* adjustments);	34 base::OffsetAdjuster::Adjustments* adjustments);

34 bool IDNToUnicodeOneComponent(const base::char16* comp,	35 bool IDNToUnicodeOneComponent(const base::char16* comp,

35 size_t comp_len,	36 size_t comp_len,

	37 bool is_tld_ascii,

36 base::string16* out);	38 base::string16* out);

37	39

38 class AppendComponentTransform {	40 class AppendComponentTransform {

39 public:	41 public:

40 AppendComponentTransform() {}	42 AppendComponentTransform() {}

41 virtual ~AppendComponentTransform() {}	43 virtual ~AppendComponentTransform() {}

42	44

43 virtual base::string16 Execute(	45 virtual base::string16 Execute(

44 const std::string& component_text,	46 const std::string& component_text,

45 base::OffsetAdjuster::Adjustments* adjustments) const = 0;	47 base::OffsetAdjuster::Adjustments* adjustments) const = 0;

(...skipping 147 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
193 // allow unicode UNC hostnames regardless of encodings.	195 // allow unicode UNC hostnames regardless of encodings.

194 base::string16 IDNToUnicodeWithAdjustments(	196 base::string16 IDNToUnicodeWithAdjustments(

195 base::StringPiece host, base::OffsetAdjuster::Adjustments* adjustments) {	197 base::StringPiece host, base::OffsetAdjuster::Adjustments* adjustments) {

196 if (adjustments)	198 if (adjustments)

197 adjustments->clear();	199 adjustments->clear();

198 // Convert the ASCII input to a base::string16 for ICU.	200 // Convert the ASCII input to a base::string16 for ICU.

199 base::string16 input16;	201 base::string16 input16;

200 input16.reserve(host.length());	202 input16.reserve(host.length());

201 input16.insert(input16.end(), host.begin(), host.end());	203 input16.insert(input16.end(), host.begin(), host.end());

202	204

	205 bool is_tld_ascii = true;

	206 size_t last_dot = host.rfind('.');

	207 if (last_dot != base::StringPiece::npos &&

	208 host.substr(last_dot).starts_with(".xn--")) {

	209 is_tld_ascii = false;

	210 }

	211

203 // Do each component of the host separately, since we enforce script matching	212 // Do each component of the host separately, since we enforce script matching

204 // on a per-component basis.	213 // on a per-component basis.

205 base::string16 out16;	214 base::string16 out16;

206 for (size_t component_start = 0, component_end;	215 for (size_t component_start = 0, component_end;

207 component_start < input16.length();	216 component_start < input16.length();

208 component_start = component_end + 1) {	217 component_start = component_end + 1) {

209 // Find the end of the component.	218 // Find the end of the component.

210 component_end = input16.find('.', component_start);	219 component_end = input16.find('.', component_start);

211 if (component_end == base::string16::npos)	220 if (component_end == base::string16::npos)

212 component_end = input16.length(); // For getting the last component.	221 component_end = input16.length(); // For getting the last component.

213 size_t component_length = component_end - component_start;	222 size_t component_length = component_end - component_start;

214 size_t new_component_start = out16.length();	223 size_t new_component_start = out16.length();

215 bool converted_idn = false;	224 bool converted_idn = false;

216 if (component_end > component_start) {	225 if (component_end > component_start) {

217 // Add the substring that we just found.	226 // Add the substring that we just found.

218 converted_idn =	227 converted_idn =

219 IDNToUnicodeOneComponent(input16.data() + component_start,	228 IDNToUnicodeOneComponent(input16.data() + component_start,

220 component_length, &out16);	229 component_length, is_tld_ascii, &out16);

221 }	230 }

222 size_t new_component_length = out16.length() - new_component_start;	231 size_t new_component_length = out16.length() - new_component_start;

223	232

224 if (converted_idn && adjustments) {	233 if (converted_idn && adjustments) {

225 adjustments->push_back(base::OffsetAdjuster::Adjustment(	234 adjustments->push_back(base::OffsetAdjuster::Adjustment(

226 component_start, component_length, new_component_length));	235 component_start, component_length, new_component_length));

227 }	236 }

228	237

229 // Need to add the dot we just found (if we found one).	238 // Need to add the dot we just found (if we found one).

230 if (component_end < input16.length())	239 if (component_end < input16.length())

231 out16.push_back('.');	240 out16.push_back('.');

232 }	241 }

233 return out16;	242 return out16;

234 }	243 }

235	244

236 // A helper class for IDN Spoof checking, used to ensure that no IDN input is	245 // A helper class for IDN Spoof checking, used to ensure that no IDN input is

237 // spoofable per Chromium's standard of spoofability. For a more thorough	246 // spoofable per Chromium's standard of spoofability. For a more thorough

238 // explanation of how spoof checking works in Chromium, see	247 // explanation of how spoof checking works in Chromium, see

239 // http://dev.chromium.org/developers/design-documents/idn-in-google-chrome .	248 // http://dev.chromium.org/developers/design-documents/idn-in-google-chrome .

240 class IDNSpoofChecker {	249 class IDNSpoofChecker {

241 public:	250 public:

242 IDNSpoofChecker();	251 IDNSpoofChecker();

243	252

244 // Returns true if \|label\| is safe to display as Unicode. In the event of	253 // Returns true if \|label\| is safe to display as Unicode. When the TLD is

245 // library failure, all IDN inputs will be treated as unsafe.	254 // ASCII, check if a label is entirely made of Cyrillic letters that look like

246 bool Check(base::StringPiece16 label);	255 // Latin letters. In the event of library failure, all IDN inputs will be

	256 // treated as unsafe.

	257 bool Check(base::StringPiece16 label, bool is_tld_ascii);

247	258

248 private:	259 private:

249 void SetAllowedUnicodeSet(UErrorCode* status);	260 void SetAllowedUnicodeSet(UErrorCode* status);

	261 bool IsMadeOfLatinAlikeCyrillic(const icu::UnicodeString& label_string);

250	262

251 USpoofChecker* checker_;	263 USpoofChecker* checker_;

252 icu::UnicodeSet deviation_characters_;	264 icu::UnicodeSet deviation_characters_;

253 icu::UnicodeSet non_ascii_latin_letters_;	265 icu::UnicodeSet non_ascii_latin_letters_;

254 icu::UnicodeSet kana_letters_exceptions_;	266 icu::UnicodeSet kana_letters_exceptions_;

	267 icu::UnicodeSet cyrillic_letters_;

	268 icu::UnicodeSet cyrillic_letters_latin_alike_;

255	269

256 DISALLOW_COPY_AND_ASSIGN(IDNSpoofChecker);	270 DISALLOW_COPY_AND_ASSIGN(IDNSpoofChecker);

257 };	271 };

258	272

259 base::LazyInstance<IDNSpoofChecker>::Leaky g_idn_spoof_checker =	273 base::LazyInstance<IDNSpoofChecker>::Leaky g_idn_spoof_checker =

260 LAZY_INSTANCE_INITIALIZER;	274 LAZY_INSTANCE_INITIALIZER;

261 base::ThreadLocalStorage::StaticSlot tls_index = TLS_INITIALIZER;	275 base::ThreadLocalStorage::StaticSlot tls_index = TLS_INITIALIZER;

262	276

263 void OnThreadTermination(void* regex_matcher) {	277 void OnThreadTermination(void* regex_matcher) {

264 delete reinterpret_cast<icu::RegexMatcher*>(regex_matcher);	278 delete reinterpret_cast<icu::RegexMatcher*>(regex_matcher);

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
307 // the allowed set.	321 // the allowed set.

308 non_ascii_latin_letters_ = icu::UnicodeSet(	322 non_ascii_latin_letters_ = icu::UnicodeSet(

309 UNICODE_STRING_SIMPLE("[[:Latin:] - [a-zA-Z]]"), status);	323 UNICODE_STRING_SIMPLE("[[:Latin:] - [a-zA-Z]]"), status);

310 non_ascii_latin_letters_.freeze();	324 non_ascii_latin_letters_.freeze();

311	325

312 // These letters are parts of \|dangerous_patterns_\|.	326 // These letters are parts of \|dangerous_patterns_\|.

313 kana_letters_exceptions_ = icu::UnicodeSet(UNICODE_STRING_SIMPLE(	327 kana_letters_exceptions_ = icu::UnicodeSet(UNICODE_STRING_SIMPLE(

314 "[\\u3078-\\u307a\\u30d8-\\u30da\\u30fb\\u30fc]"), status);	328 "[\\u3078-\\u307a\\u30d8-\\u30da\\u30fb\\u30fc]"), status);

315 kana_letters_exceptions_.freeze();	329 kana_letters_exceptions_.freeze();

316	330

	331 // These Cyrillic letters look like Latin. A domain label entirely made of

	332 // these letters is blocked as a simpliified whole-script-spoofable.

	333 cyrillic_letters_latin_alike_ =

	334 icu::UnicodeSet(icu::UnicodeString("[асԁеһіјӏорԛѕԝхуъЬҽпгѵѡ]"), status);

	335 cyrillic_letters_latin_alike_.freeze();

	336

	337 cyrillic_letters_ =

	338 icu::UnicodeSet(UNICODE_STRING_SIMPLE("[[:Cyrl:]]"), status);

	339 cyrillic_letters_.freeze();

	340

317 DCHECK(U_SUCCESS(status));	341 DCHECK(U_SUCCESS(status));

318 }	342 }

319	343

320 bool IDNSpoofChecker::Check(base::StringPiece16 label) {	344 bool IDNSpoofChecker::Check(base::StringPiece16 label, bool is_tld_ascii) {

321 UErrorCode status = U_ZERO_ERROR;	345 UErrorCode status = U_ZERO_ERROR;

322 int32_t result = uspoof_check(checker_, label.data(),	346 int32_t result = uspoof_check(checker_, label.data(),

323 base::checked_cast<int32_t>(label.size()),	347 base::checked_cast<int32_t>(label.size()),

324 NULL, &status);	348 NULL, &status);

325 // If uspoof_check fails (due to library failure), or if any of the checks	349 // If uspoof_check fails (due to library failure), or if any of the checks

326 // fail, treat the IDN as unsafe.	350 // fail, treat the IDN as unsafe.

327 if (U_FAILURE(status) \|\| (result & USPOOF_ALL_CHECKS))	351 if (U_FAILURE(status) \|\| (result & USPOOF_ALL_CHECKS))

328 return false;	352 return false;

329	353

330 icu::UnicodeString label_string(FALSE, label.data(),	354 icu::UnicodeString label_string(FALSE, label.data(),

331 base::checked_cast<int32_t>(label.size()));	355 base::checked_cast<int32_t>(label.size()));

332	356

333 // A punycode label with 'xn--' prefix is not subject to the URL	357 // A punycode label with 'xn--' prefix is not subject to the URL

334 // canonicalization and is stored as it is in GURL. If it encodes a deviation	358 // canonicalization and is stored as it is in GURL. If it encodes a deviation

335 // character (UTS 46; e.g. U+00DF/sharp-s), it should be still shown in	359 // character (UTS 46; e.g. U+00DF/sharp-s), it should be still shown in

336 // punycode instead of Unicode. Without this check, xn--fu-hia for	360 // punycode instead of Unicode. Without this check, xn--fu-hia for

337 // 'fu<sharp-s>' would be converted to 'fu<sharp-s>' for display because	361 // 'fu<sharp-s>' would be converted to 'fu<sharp-s>' for display because

338 // "UTS 46 section 4 Processing step 4" applies validity criteria for	362 // "UTS 46 section 4 Processing step 4" applies validity criteria for

339 // non-transitional processing (i.e. do not map deviation characters) to any	363 // non-transitional processing (i.e. do not map deviation characters) to any

340 // punycode labels regardless of whether transitional or non-transitional is	364 // punycode labels regardless of whether transitional or non-transitional is

341 // chosen. On the other hand, 'fu<sharp-s>' typed or copy and pasted	365 // chosen. On the other hand, 'fu<sharp-s>' typed or copy and pasted

342 // as Unicode would be canonicalized to 'fuss' by GURL and is displayed as	366 // as Unicode would be canonicalized to 'fuss' by GURL and is displayed as

343 // such. See http://crbug.com/595263 .	367 // such. See http://crbug.com/595263 .

344 if (deviation_characters_.containsSome(label_string))	368 if (deviation_characters_.containsSome(label_string))

345 return false;	369 return false;

346	370

347 // If there's no script mixing, the input is regarded as safe without any	371 // If there's no script mixing, the input is regarded as safe without any

348 // extra check unless it contains Kana letter exceptions. Note that	372 // extra check unless it contains Kana letter exceptions or it's made entirely

349 // the following combinations of scripts are treated as a 'logical' single	373 // of Cyrillic letters that look like Latin letters. Note that the following

350 // script.	374 // combinations of scripts are treated as a 'logical' single script.

351 // - Chinese: Han, Bopomofo, Common	375 // - Chinese: Han, Bopomofo, Common

352 // - Japanese: Han, Hiragana, Katakana, Common	376 // - Japanese: Han, Hiragana, Katakana, Common

353 // - Korean: Hangul, Han, Common	377 // - Korean: Hangul, Han, Common

354 result &= USPOOF_RESTRICTION_LEVEL_MASK;	378 result &= USPOOF_RESTRICTION_LEVEL_MASK;

355 if (result == USPOOF_ASCII \|\|	379 if (result == USPOOF_ASCII) return true;

356 (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE &&	380 if (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE &&

357 kana_letters_exceptions_.containsNone(label_string)))	381 kana_letters_exceptions_.containsNone(label_string)) {

358 return true;	382 // Check Cyrillic confusable only for ASCII TLDs.

	383 return !is_tld_ascii \|\| !IsMadeOfLatinAlikeCyrillic(label_string);

	384 }

359	385

360 // Additional checks for \|label\| with multiple scripts, one of which is Latin.	386 // Additional checks for \|label\| with multiple scripts, one of which is Latin.

361 // Disallow non-ASCII Latin letters to mix with a non-Latin script.	387 // Disallow non-ASCII Latin letters to mix with a non-Latin script.

362 if (non_ascii_latin_letters_.containsSome(label_string))	388 if (non_ascii_latin_letters_.containsSome(label_string))

363 return false;	389 return false;

364	390

365 if (!tls_index.initialized())	391 if (!tls_index.initialized())

366 tls_index.Initialize(&OnThreadTermination);	392 tls_index.Initialize(&OnThreadTermination);

367 icu::RegexMatcher* dangerous_pattern =	393 icu::RegexMatcher* dangerous_pattern =

368 reinterpret_cast<icu::RegexMatcher*>(tls_index.Get());	394 reinterpret_cast<icu::RegexMatcher*>(tls_index.Get());

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
400 "[a-z][\\u0585\\u0581]+[a-z]\|"	426 "[a-z][\\u0585\\u0581]+[a-z]\|"

401 "^[og]+[\\p{scx=armn}]\|[\\p{scx=armn}][og]+$\|"	427 "^[og]+[\\p{scx=armn}]\|[\\p{scx=armn}][og]+$\|"

402 "[\\p{scx=armn}][og]+[\\p{scx=armn}]", -1, US_INV),	428 "[\\p{scx=armn}][og]+[\\p{scx=armn}]", -1, US_INV),

403 0, status);	429 0, status);

404 tls_index.Set(dangerous_pattern);	430 tls_index.Set(dangerous_pattern);

405 }	431 }

406 dangerous_pattern->reset(label_string);	432 dangerous_pattern->reset(label_string);

407 return !dangerous_pattern->find();	433 return !dangerous_pattern->find();

408 }	434 }

409	435

	436 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic(

	437 const icu::UnicodeString& label_string) {

	438 // Collect all the Cyrillic letters in \|label_string\| and see if they're

	439 // a subset of \|cyrillic_letters_latin_alike_\|.

	440 // A shortcut of defining cyrillic_letters_latin_alike_ to include [0-9] and

	441 // [_-] and checking if the set contains all letters of \|label_string\|

	442 // would work in most cases, but not if a label has non-letters outside

	443 // ASCII.

	444 icu::UnicodeSet cyrillic_in_label;

	445 icu::StringCharacterIterator it(label_string);

	446 for (it.setToStart(); it.hasNext();) {

	447 const UChar32 c = it.next32PostInc();

	448 if (cyrillic_letters_.contains(c))

	449 cyrillic_in_label.add(c);

	450 }

	451 return !cyrillic_in_label.isEmpty() &&

	452 cyrillic_letters_latin_alike_.containsAll(cyrillic_in_label);

	453 }

	454

410 void IDNSpoofChecker::SetAllowedUnicodeSet(UErrorCode* status) {	455 void IDNSpoofChecker::SetAllowedUnicodeSet(UErrorCode* status) {

411 if (U_FAILURE(*status))	456 if (U_FAILURE(*status))

412 return;	457 return;

413	458

414 // The recommended set is a set of characters for identifiers in a	459 // The recommended set is a set of characters for identifiers in a

415 // security-sensitive environment taken from UTR 39	460 // security-sensitive environment taken from UTR 39

416 // (http://unicode.org/reports/tr39/) and	461 // (http://unicode.org/reports/tr39/) and

417 // http://www.unicode.org/Public/security/latest/xidmodifications.txt .	462 // http://www.unicode.org/Public/security/latest/xidmodifications.txt .

418 // The inclusion set comes from "Candidate Characters for Inclusion	463 // The inclusion set comes from "Candidate Characters for Inclusion

419 // in idenfiers" of UTR 31 (http://www.unicode.org/reports/tr31). The list	464 // in idenfiers" of UTR 31 (http://www.unicode.org/reports/tr31). The list

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
474 allowed_set.remove(0x2010u); // Hyphen	519 allowed_set.remove(0x2010u); // Hyphen

475 allowed_set.remove(0x2027u); // Hyphenation Point	520 allowed_set.remove(0x2027u); // Hyphenation Point

476	521

477 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status);	522 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status);

478 }	523 }

479	524

480 // Returns true if the given Unicode host component is safe to display to the	525 // Returns true if the given Unicode host component is safe to display to the

481 // user. Note that this function does not deal with pure ASCII domain labels at	526 // user. Note that this function does not deal with pure ASCII domain labels at

482 // all even though it's possible to make up look-alike labels with ASCII	527 // all even though it's possible to make up look-alike labels with ASCII

483 // characters alone.	528 // characters alone.

484 bool IsIDNComponentSafe(base::StringPiece16 label) {	529 bool IsIDNComponentSafe(base::StringPiece16 label, bool is_tld_ascii) {

485 return g_idn_spoof_checker.Get().Check(label);	530 return g_idn_spoof_checker.Get().Check(label, is_tld_ascii);

486 }	531 }

487	532

488 // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to	533 // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to

489 // a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().	534 // a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().

490 //	535 //

491 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with the	536 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with the

492 // backward compatibility in mind. What it does:	537 // backward compatibility in mind. What it does:

493 //	538 //

494 // 1. Use the up-to-date Unicode data.	539 // 1. Use the up-to-date Unicode data.

495 // 2. Define a case folding/mapping with the up-to-date Unicode data as in	540 // 2. Define a case folding/mapping with the up-to-date Unicode data as in

(...skipping 24 matching lines...) Expand all Loading...
520	565

521 base::LazyInstance<UIDNAWrapper>::Leaky g_uidna = LAZY_INSTANCE_INITIALIZER;	566 base::LazyInstance<UIDNAWrapper>::Leaky g_uidna = LAZY_INSTANCE_INITIALIZER;

522	567

523 // Converts one component (label) of a host (between dots) to Unicode if safe.	568 // Converts one component (label) of a host (between dots) to Unicode if safe.

524 // The result will be APPENDED to the given output string and will be the	569 // The result will be APPENDED to the given output string and will be the

525 // same as the input if it is not IDN in ACE/punycode or the IDN is unsafe to	570 // same as the input if it is not IDN in ACE/punycode or the IDN is unsafe to

526 // display.	571 // display.

527 // Returns whether any conversion was performed.	572 // Returns whether any conversion was performed.

528 bool IDNToUnicodeOneComponent(const base::char16* comp,	573 bool IDNToUnicodeOneComponent(const base::char16* comp,

529 size_t comp_len,	574 size_t comp_len,

	575 bool is_tld_ascii,

530 base::string16* out) {	576 base::string16* out) {

531 DCHECK(out);	577 DCHECK(out);

532 if (comp_len == 0)	578 if (comp_len == 0)

533 return false;	579 return false;

534	580

535 // Only transform if the input can be an IDN component.	581 // Only transform if the input can be an IDN component.

536 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};	582 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};

537 if ((comp_len > arraysize(kIdnPrefix)) &&	583 if ((comp_len > arraysize(kIdnPrefix)) &&

538 !memcmp(comp, kIdnPrefix, sizeof(kIdnPrefix))) {	584 !memcmp(comp, kIdnPrefix, sizeof(kIdnPrefix))) {

539 UIDNA* uidna = g_uidna.Get().value;	585 UIDNA* uidna = g_uidna.Get().value;

(...skipping 11 matching lines...) Expand all Loading...
551 output_length = uidna_labelToUnicode(	597 output_length = uidna_labelToUnicode(

552 uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length],	598 uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length],

553 output_length, &info, &status);	599 output_length, &info, &status);

554 } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0));	600 } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0));

555	601

556 if (U_SUCCESS(status) && info.errors == 0) {	602 if (U_SUCCESS(status) && info.errors == 0) {

557 // Converted successfully. Ensure that the converted component	603 // Converted successfully. Ensure that the converted component

558 // can be safely displayed to the user.	604 // can be safely displayed to the user.

559 out->resize(original_length + output_length);	605 out->resize(original_length + output_length);

560 if (IsIDNComponentSafe(	606 if (IsIDNComponentSafe(

561 base::StringPiece16(out->data() + original_length,	607 base::StringPiece16(out->data() + original_length,

562 base::checked_cast<size_t>(output_length))))	608 base::checked_cast<size_t>(output_length)),

	609 is_tld_ascii))

563 return true;	610 return true;

564 }	611 }

565	612

566 // Something went wrong. Revert to original string.	613 // Something went wrong. Revert to original string.

567 out->resize(original_length);	614 out->resize(original_length);

568 }	615 }

569	616

570 // We get here with no IDN or on error, in which case we just append the	617 // We get here with no IDN or on error, in which case we just append the

571 // literal input.	618 // literal input.

572 out->append(comp, comp_len);	619 out->append(comp, comp_len);

(...skipping 226 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
799 return base::StartsWith(text, www, base::CompareCase::SENSITIVE)	846 return base::StartsWith(text, www, base::CompareCase::SENSITIVE)

800 ? text.substr(www.length()) : text;	847 ? text.substr(www.length()) : text;

801 }	848 }

802	849

803 base::string16 StripWWWFromHost(const GURL& url) {	850 base::string16 StripWWWFromHost(const GURL& url) {

804 DCHECK(url.is_valid());	851 DCHECK(url.is_valid());

805 return StripWWW(base::ASCIIToUTF16(url.host_piece()));	852 return StripWWW(base::ASCIIToUTF16(url.host_piece()));

806 }	853 }

807	854

808 } // namespace url_formatter	855 } // namespace url_formatter

OLD	NEW

« no previous file with comments | « no previous file | components/url_formatter/url_formatter_unittest.cc » ('j') | no next file with comments »