components/url_formatter/url_formatter.cc - Issue 2683793010: Block domain labels made of Cyrillic letters that look alike Latin

Side by Side Diff: components/url_formatter/url_formatter.cc

Issue 2683793010: Block domain labels made of Cyrillic letters that look alike Latin (Closed)

Patch Set: check Cyrl-Latn alikes only for non-IDN tlds Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2015 The Chromium Authors. All rights reserved.	1 // Copyright 2015 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/url_formatter/url_formatter.h"	5 #include "components/url_formatter/url_formatter.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <utility>	8 #include <utility>

9	9

10 #include "base/lazy_instance.h"	10 #include "base/lazy_instance.h"

(...skipping 13 matching lines...) Expand all Loading...
24 #include "url/gurl.h"	24 #include "url/gurl.h"

25 #include "url/third_party/mozilla/url_parse.h"	25 #include "url/third_party/mozilla/url_parse.h"

26	26

27 namespace url_formatter {	27 namespace url_formatter {

28	28

29 namespace {	29 namespace {

30	30

31 base::string16 IDNToUnicodeWithAdjustments(	31 base::string16 IDNToUnicodeWithAdjustments(

32 base::StringPiece host,	32 base::StringPiece host,

33 base::OffsetAdjuster::Adjustments* adjustments);	33 base::OffsetAdjuster::Adjustments* adjustments);

34 bool IDNToUnicodeOneComponent(const base::char16* comp,	34 bool IDNToUnicodeOneComponent(const base::char16* comp, size_t comp_len,

35 size_t comp_len,	35 bool is_tld_ascii, base::string16* out);

36 base::string16* out);

37	36

38 class AppendComponentTransform {	37 class AppendComponentTransform {

39 public:	38 public:

40 AppendComponentTransform() {}	39 AppendComponentTransform() {}

41 virtual ~AppendComponentTransform() {}	40 virtual ~AppendComponentTransform() {}

42	41

43 virtual base::string16 Execute(	42 virtual base::string16 Execute(

44 const std::string& component_text,	43 const std::string& component_text,

45 base::OffsetAdjuster::Adjustments* adjustments) const = 0;	44 base::OffsetAdjuster::Adjustments* adjustments) const = 0;

46	45

(...skipping 146 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
193 // allow unicode UNC hostnames regardless of encodings.	192 // allow unicode UNC hostnames regardless of encodings.

194 base::string16 IDNToUnicodeWithAdjustments(	193 base::string16 IDNToUnicodeWithAdjustments(

195 base::StringPiece host, base::OffsetAdjuster::Adjustments* adjustments) {	194 base::StringPiece host, base::OffsetAdjuster::Adjustments* adjustments) {

196 if (adjustments)	195 if (adjustments)

197 adjustments->clear();	196 adjustments->clear();

198 // Convert the ASCII input to a base::string16 for ICU.	197 // Convert the ASCII input to a base::string16 for ICU.

199 base::string16 input16;	198 base::string16 input16;

200 input16.reserve(host.length());	199 input16.reserve(host.length());

201 input16.insert(input16.end(), host.begin(), host.end());	200 input16.insert(input16.end(), host.begin(), host.end());

202	201

	202 bool is_tld_ascii = true;

	203 size_t last_dot = host.rfind('.');

	204 static const char* kAcePrefix = "xn--";

	205 const size_t kAcePrefixLen = 4;

	206 if (last_dot != base::StringPiece::npos &&

	207 last_dot + kAcePrefixLen < host.length() &&

	208 memcmp(kAcePrefix, host.data() + last_dot + 1, kAcePrefixLen) == 0) {

	209 is_tld_ascii = false;
	sffc 2017/02/15 19:57:30 I don't really understand what this part of the co I don't really understand what this part of the code is doing. jungshik at Google 2017/02/15 20:55:52 It's checking if the TLD starts with 'xn--'. Other Show quoted text On 2017/02/15 19:57:30, sffc wrote: > I don't really understand what this part of the code is doing. It's checking if the TLD starts with 'xn--'. Otherwise, it assumes that it's a non-IDN TLD. Note that \|host\| is in ACE at this point.
	210 }

	211

203 // Do each component of the host separately, since we enforce script matching	212 // Do each component of the host separately, since we enforce script matching

204 // on a per-component basis.	213 // on a per-component basis.

205 base::string16 out16;	214 base::string16 out16;

206 for (size_t component_start = 0, component_end;	215 for (size_t component_start = 0, component_end;

207 component_start < input16.length();	216 component_start < input16.length();

208 component_start = component_end + 1) {	217 component_start = component_end + 1) {

209 // Find the end of the component.	218 // Find the end of the component.

210 component_end = input16.find('.', component_start);	219 component_end = input16.find('.', component_start);

211 if (component_end == base::string16::npos)	220 if (component_end == base::string16::npos)

212 component_end = input16.length(); // For getting the last component.	221 component_end = input16.length(); // For getting the last component.

213 size_t component_length = component_end - component_start;	222 size_t component_length = component_end - component_start;

214 size_t new_component_start = out16.length();	223 size_t new_component_start = out16.length();

215 bool converted_idn = false;	224 bool converted_idn = false;

216 if (component_end > component_start) {	225 if (component_end > component_start) {

217 // Add the substring that we just found.	226 // Add the substring that we just found.

218 converted_idn =	227 converted_idn =

219 IDNToUnicodeOneComponent(input16.data() + component_start,	228 IDNToUnicodeOneComponent(input16.data() + component_start,

220 component_length, &out16);	229 component_length, is_tld_ascii, &out16);

221 }	230 }

222 size_t new_component_length = out16.length() - new_component_start;	231 size_t new_component_length = out16.length() - new_component_start;

223	232

224 if (converted_idn && adjustments) {	233 if (converted_idn && adjustments) {

225 adjustments->push_back(base::OffsetAdjuster::Adjustment(	234 adjustments->push_back(base::OffsetAdjuster::Adjustment(

226 component_start, component_length, new_component_length));	235 component_start, component_length, new_component_length));

227 }	236 }

228	237

229 // Need to add the dot we just found (if we found one).	238 // Need to add the dot we just found (if we found one).

230 if (component_end < input16.length())	239 if (component_end < input16.length())

231 out16.push_back('.');	240 out16.push_back('.');

232 }	241 }

233 return out16;	242 return out16;

234 }	243 }

235	244

236 // A helper class for IDN Spoof checking, used to ensure that no IDN input is	245 // A helper class for IDN Spoof checking, used to ensure that no IDN input is

237 // spoofable per Chromium's standard of spoofability. For a more thorough	246 // spoofable per Chromium's standard of spoofability. For a more thorough

238 // explanation of how spoof checking works in Chromium, see	247 // explanation of how spoof checking works in Chromium, see

239 // http://dev.chromium.org/developers/design-documents/idn-in-google-chrome .	248 // http://dev.chromium.org/developers/design-documents/idn-in-google-chrome .

240 class IDNSpoofChecker {	249 class IDNSpoofChecker {

241 public:	250 public:

242 IDNSpoofChecker();	251 IDNSpoofChecker();

243	252

244 // Returns true if \|label\| is safe to display as Unicode. In the event of	253 // Returns true if \|label\| is safe to display as Unicode. When

245 // library failure, all IDN inputs will be treated as unsafe.	254 // TLD is ASCII, check if a label is entirely made of

246 bool Check(base::StringPiece16 label);	255 // Cyrillic letters that look alike Latin letters. In the event of library

	256 // failure, all IDN inputs will be treated as unsafe.

	257 bool Check(base::StringPiece16 label, bool is_tld_ascii);

247	258

248 private:	259 private:

249 void SetAllowedUnicodeSet(UErrorCode* status);	260 void SetAllowedUnicodeSet(UErrorCode* status);

250	261

251 USpoofChecker* checker_;	262 USpoofChecker* checker_;

252 icu::UnicodeSet deviation_characters_;	263 icu::UnicodeSet deviation_characters_;

253 icu::UnicodeSet non_ascii_latin_letters_;	264 icu::UnicodeSet non_ascii_latin_letters_;

254 icu::UnicodeSet kana_letters_exceptions_;	265 icu::UnicodeSet kana_letters_exceptions_;

	266 icu::UnicodeSet cyrillic_letters_latin_alike_;

255	267

256 DISALLOW_COPY_AND_ASSIGN(IDNSpoofChecker);	268 DISALLOW_COPY_AND_ASSIGN(IDNSpoofChecker);

257 };	269 };

258	270

259 base::LazyInstance<IDNSpoofChecker>::Leaky g_idn_spoof_checker =	271 base::LazyInstance<IDNSpoofChecker>::Leaky g_idn_spoof_checker =

260 LAZY_INSTANCE_INITIALIZER;	272 LAZY_INSTANCE_INITIALIZER;

261 base::ThreadLocalStorage::StaticSlot tls_index = TLS_INITIALIZER;	273 base::ThreadLocalStorage::StaticSlot tls_index = TLS_INITIALIZER;

262	274

263 void OnThreadTermination(void* regex_matcher) {	275 void OnThreadTermination(void* regex_matcher) {

264 delete reinterpret_cast<icu::RegexMatcher*>(regex_matcher);	276 delete reinterpret_cast<icu::RegexMatcher*>(regex_matcher);

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
306 // because additional characters pulled in with scx=Latn are not included in	318 // because additional characters pulled in with scx=Latn are not included in

307 // the allowed set.	319 // the allowed set.

308 non_ascii_latin_letters_ = icu::UnicodeSet(	320 non_ascii_latin_letters_ = icu::UnicodeSet(

309 UNICODE_STRING_SIMPLE("[[:Latin:] - [a-zA-Z]]"), status);	321 UNICODE_STRING_SIMPLE("[[:Latin:] - [a-zA-Z]]"), status);

310 non_ascii_latin_letters_.freeze();	322 non_ascii_latin_letters_.freeze();

311	323

312 // These letters are parts of \|dangerous_patterns_\|.	324 // These letters are parts of \|dangerous_patterns_\|.

313 kana_letters_exceptions_ = icu::UnicodeSet(UNICODE_STRING_SIMPLE(	325 kana_letters_exceptions_ = icu::UnicodeSet(UNICODE_STRING_SIMPLE(

314 "[\\u3078-\\u307a\\u30d8-\\u30da\\u30fb\\u30fc]"), status);	326 "[\\u3078-\\u307a\\u30d8-\\u30da\\u30fb\\u30fc]"), status);

315 kana_letters_exceptions_.freeze();	327 kana_letters_exceptions_.freeze();

	328 // These Cyrillic letters look alike Latin. A domain label entirely

	329 // made of these letters are blocked as a poorman's whole-script-spoofable.

	330 cyrillic_letters_latin_alike_ = icu::UnicodeSet(

	331 icu::UnicodeString("[аеорсухьѕіјһмӏтнв]"), status);
	jungshik at Google 2017/02/15 18:50:21 "м, т, н, в" look like smallcap Latin and it's deb "м, т, н, в" look like smallcap Latin and it's debatable whether or not to include them in the set. sffc 2017/02/15 19:57:30 I'd look at the capital letters too. Here's a pos Show quoted text On 2017/02/15 18:50:21, jungshik at Google wrote: > "м, т, н, в" look like smallcap Latin and it's debatable whether or not to > include them in the set. I'd look at the capital letters too. Here's a possible list of strong matches based on manual inspection of confusables.txt, which includes more characters than your list: асԁеһіјӏорԛѕԝхуАВСЕԌНІӀЈКМОРЅТԜХҮ And here's a possibly incomplete list of weaker matches; up to you whether you want to include them: ӕъЬвҽвԍнкпгмтцѵѡүӔѴҲ lgarron 2017/02/15 20:24:49 Also consider ԁ, Ӏ, and maybe ѵ, listed at the bot Also consider ԁ, Ӏ, and maybe ѵ, listed at the bottom of [1]. [1] https://en.wikipedia.org/wiki/IDN_homograph_attack#Cyrillic lgarron 2017/02/15 20:24:49 Also consider ԁ, Ӏ, and maybe ѵ, listed at the bot Also consider ԁ, Ӏ, and maybe ѵ, listed at the bottom of [1]. [1] https://en.wikipedia.org/wiki/IDN_homograph_attack#Cyrillic lgarron 2017/02/15 20:25:56 (Note that Ӏ is a spoof of l, not just I.) Show quoted text On 2017/02/15 at 20:24:49, lgarron wrote: > Also consider ԁ, Ӏ, and maybe ѵ, listed at the bottom of [1]. > > > [1] https://en.wikipedia.org/wiki/IDN_homograph_attack#Cyrillic (Note that Ӏ is a spoof of l, not just I.) jungshik at Google 2017/02/15 20:55:52 Well, uppercase letters will not 'survive' (they'l Show quoted text On 2017/02/15 19:57:30, sffc wrote: > On 2017/02/15 18:50:21, jungshik at Google wrote: > > "м, т, н, в" look like smallcap Latin and it's debatable whether or not to > > include them in the set. > > I'd look at the capital letters too. Here's a possible list of strong matches > based on manual inspection of confusables.txt, which includes more characters > than your list: > > асԁеһіјӏорԛѕԝхуАВСЕԌНІӀЈКМОРЅТԜХҮ Well, uppercase letters will not 'survive' (they'll all case-mapped to lowercase letters before being displayed). And, one cannot register domains with uppercase letters with most (if not all) registrars. For instance, you cannot in Verisign-controlled TLDs. Show quoted text > And here's a possibly incomplete list of weaker matches; up to you whether you > want to include them: > > ӕъЬвҽвԍнкпгмтцѵѡүӔѴҲ Thank you for the list. Expanding the set has a risk of having too many false positives. The list is currently in flux and I'll try various sets and see how they work. jungshik at Google 2017/02/15 20:55:52 Thanks. U+0501 and U+0475 I'll consider. As for U+ Show quoted text On 2017/02/15 20:25:56, lgarron wrote: > On 2017/02/15 at 20:24:49, lgarron wrote: > > Also consider ԁ, Ӏ, and maybe ѵ, listed at the bottom of [1]. Thanks. U+0501 and U+0475 I'll consider. As for U+04C0, see below. Show quoted text > > > > > > [1] https://en.wikipedia.org/wiki/IDN_homograph_attack#Cyrillic > > (Note that Ӏ is a spoof of l, not just I.) U+04C0 ( 'Ӏ') is uppercase so that it will be normalized away to lowercase (U+04CF that is already included) before being displayed.
	332 cyrillic_letters_latin_alike_.freeze();

316	333

317 DCHECK(U_SUCCESS(status));	334 DCHECK(U_SUCCESS(status));

318 }	335 }

319	336

320 bool IDNSpoofChecker::Check(base::StringPiece16 label) {	337 bool IDNSpoofChecker::Check(base::StringPiece16 label,

	338 bool is_tld_ascii) {

321 UErrorCode status = U_ZERO_ERROR;	339 UErrorCode status = U_ZERO_ERROR;

322 int32_t result = uspoof_check(checker_, label.data(),	340 int32_t result = uspoof_check(checker_, label.data(),

323 base::checked_cast<int32_t>(label.size()),	341 base::checked_cast<int32_t>(label.size()),

324 NULL, &status);	342 NULL, &status);

325 // If uspoof_check fails (due to library failure), or if any of the checks	343 // If uspoof_check fails (due to library failure), or if any of the checks

326 // fail, treat the IDN as unsafe.	344 // fail, treat the IDN as unsafe.

327 if (U_FAILURE(status) \|\| (result & USPOOF_ALL_CHECKS))	345 if (U_FAILURE(status) \|\| (result & USPOOF_ALL_CHECKS))

328 return false;	346 return false;

329	347

330 icu::UnicodeString label_string(FALSE, label.data(),	348 icu::UnicodeString label_string(FALSE, label.data(),

331 base::checked_cast<int32_t>(label.size()));	349 base::checked_cast<int32_t>(label.size()));

332	350

333 // A punycode label with 'xn--' prefix is not subject to the URL	351 // A punycode label with 'xn--' prefix is not subject to the URL

334 // canonicalization and is stored as it is in GURL. If it encodes a deviation	352 // canonicalization and is stored as it is in GURL. If it encodes a deviation

335 // character (UTS 46; e.g. U+00DF/sharp-s), it should be still shown in	353 // character (UTS 46; e.g. U+00DF/sharp-s), it should be still shown in

336 // punycode instead of Unicode. Without this check, xn--fu-hia for	354 // punycode instead of Unicode. Without this check, xn--fu-hia for

337 // 'fu<sharp-s>' would be converted to 'fu<sharp-s>' for display because	355 // 'fu<sharp-s>' would be converted to 'fu<sharp-s>' for display because

338 // "UTS 46 section 4 Processing step 4" applies validity criteria for	356 // "UTS 46 section 4 Processing step 4" applies validity criteria for

339 // non-transitional processing (i.e. do not map deviation characters) to any	357 // non-transitional processing (i.e. do not map deviation characters) to any

340 // punycode labels regardless of whether transitional or non-transitional is	358 // punycode labels regardless of whether transitional or non-transitional is

341 // chosen. On the other hand, 'fu<sharp-s>' typed or copy and pasted	359 // chosen. On the other hand, 'fu<sharp-s>' typed or copy and pasted

342 // as Unicode would be canonicalized to 'fuss' by GURL and is displayed as	360 // as Unicode would be canonicalized to 'fuss' by GURL and is displayed as

343 // such. See http://crbug.com/595263 .	361 // such. See http://crbug.com/595263 .

344 if (deviation_characters_.containsSome(label_string))	362 if (deviation_characters_.containsSome(label_string))

345 return false;	363 return false;

346	364

347 // If there's no script mixing, the input is regarded as safe without any	365 // If there's no script mixing, the input is regarded as safe without any

348 // extra check unless it contains Kana letter exceptions. Note that	366 // extra check unless it contains Kana letter exceptions or it's made enitrely

	367 // of Cyrillic letters that look alike Latin letters. Note that

349 // the following combinations of scripts are treated as a 'logical' single	368 // the following combinations of scripts are treated as a 'logical' single

350 // script.	369 // script.

351 // - Chinese: Han, Bopomofo, Common	370 // - Chinese: Han, Bopomofo, Common

352 // - Japanese: Han, Hiragana, Katakana, Common	371 // - Japanese: Han, Hiragana, Katakana, Common

353 // - Korean: Hangul, Han, Common	372 // - Korean: Hangul, Han, Common

354 result &= USPOOF_RESTRICTION_LEVEL_MASK;	373 result &= USPOOF_RESTRICTION_LEVEL_MASK;

355 if (result == USPOOF_ASCII \|\|	374 if (result == USPOOF_ASCII) return true;

356 (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE &&	375 // Check Cyrillic confusable only for ASCII TLDs.

357 kana_letters_exceptions_.containsNone(label_string)))	376 if (is_tld_ascii && cyrillic_letters_latin_alike_.containsAll(label_string))
	sffc 2017/02/15 19:57:30 I think you should compare only the letter charact I think you should compare only the letter characters. For example, a spoof string "рох-рох" that contains a non-letter character should still be considered a spoof attack. jungshik at Google 2017/02/15 20:55:52 That's a good point. Thanks ! Show quoted text On 2017/02/15 19:57:30, sffc wrote: > I think you should compare only the letter characters. For example, a spoof > string "рох-рох" that contains a non-letter character should still be considered > a spoof attack. That's a good point. Thanks !
	377 return false;

	378 if (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE &&

	379 kana_letters_exceptions_.containsNone(label_string))

358 return true;	380 return true;

359	381

360 // Additional checks for \|label\| with multiple scripts, one of which is Latin.	382 // Additional checks for \|label\| with multiple scripts, one of which is Latin.

361 // Disallow non-ASCII Latin letters to mix with a non-Latin script.	383 // Disallow non-ASCII Latin letters to mix with a non-Latin script.

362 if (non_ascii_latin_letters_.containsSome(label_string))	384 if (non_ascii_latin_letters_.containsSome(label_string))

363 return false;	385 return false;

364	386

365 if (!tls_index.initialized())	387 if (!tls_index.initialized())

366 tls_index.Initialize(&OnThreadTermination);	388 tls_index.Initialize(&OnThreadTermination);

367 icu::RegexMatcher* dangerous_pattern =	389 icu::RegexMatcher* dangerous_pattern =

(...skipping 106 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
474 allowed_set.remove(0x2010u); // Hyphen	496 allowed_set.remove(0x2010u); // Hyphen

475 allowed_set.remove(0x2027u); // Hyphenation Point	497 allowed_set.remove(0x2027u); // Hyphenation Point

476	498

477 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status);	499 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status);

478 }	500 }

479	501

480 // Returns true if the given Unicode host component is safe to display to the	502 // Returns true if the given Unicode host component is safe to display to the

481 // user. Note that this function does not deal with pure ASCII domain labels at	503 // user. Note that this function does not deal with pure ASCII domain labels at

482 // all even though it's possible to make up look-alike labels with ASCII	504 // all even though it's possible to make up look-alike labels with ASCII

483 // characters alone.	505 // characters alone.

484 bool IsIDNComponentSafe(base::StringPiece16 label) {	506 bool IsIDNComponentSafe(base::StringPiece16 label, bool is_tld_ascii) {

485 return g_idn_spoof_checker.Get().Check(label);	507 return g_idn_spoof_checker.Get().Check(label, is_tld_ascii);

486 }	508 }

487	509

488 // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to	510 // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to

489 // a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().	511 // a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().

490 //	512 //

491 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with the	513 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with the

492 // backward compatibility in mind. What it does:	514 // backward compatibility in mind. What it does:

493 //	515 //

494 // 1. Use the up-to-date Unicode data.	516 // 1. Use the up-to-date Unicode data.

495 // 2. Define a case folding/mapping with the up-to-date Unicode data as in	517 // 2. Define a case folding/mapping with the up-to-date Unicode data as in

(...skipping 24 matching lines...) Expand all Loading...
520	542

521 base::LazyInstance<UIDNAWrapper>::Leaky g_uidna = LAZY_INSTANCE_INITIALIZER;	543 base::LazyInstance<UIDNAWrapper>::Leaky g_uidna = LAZY_INSTANCE_INITIALIZER;

522	544

523 // Converts one component (label) of a host (between dots) to Unicode if safe.	545 // Converts one component (label) of a host (between dots) to Unicode if safe.

524 // The result will be APPENDED to the given output string and will be the	546 // The result will be APPENDED to the given output string and will be the

525 // same as the input if it is not IDN in ACE/punycode or the IDN is unsafe to	547 // same as the input if it is not IDN in ACE/punycode or the IDN is unsafe to

526 // display.	548 // display.

527 // Returns whether any conversion was performed.	549 // Returns whether any conversion was performed.

528 bool IDNToUnicodeOneComponent(const base::char16* comp,	550 bool IDNToUnicodeOneComponent(const base::char16* comp,

529 size_t comp_len,	551 size_t comp_len,

	552 bool is_tld_ascii,

530 base::string16* out) {	553 base::string16* out) {

531 DCHECK(out);	554 DCHECK(out);

532 if (comp_len == 0)	555 if (comp_len == 0)

533 return false;	556 return false;

534	557

535 // Only transform if the input can be an IDN component.	558 // Only transform if the input can be an IDN component.

536 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};	559 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};

537 if ((comp_len > arraysize(kIdnPrefix)) &&	560 if ((comp_len > arraysize(kIdnPrefix)) &&

538 !memcmp(comp, kIdnPrefix, sizeof(kIdnPrefix))) {	561 !memcmp(comp, kIdnPrefix, sizeof(kIdnPrefix))) {

539 UIDNA* uidna = g_uidna.Get().value;	562 UIDNA* uidna = g_uidna.Get().value;

(...skipping 12 matching lines...) Expand all Loading...
552 uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length],	575 uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length],

553 output_length, &info, &status);	576 output_length, &info, &status);

554 } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0));	577 } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0));

555	578

556 if (U_SUCCESS(status) && info.errors == 0) {	579 if (U_SUCCESS(status) && info.errors == 0) {

557 // Converted successfully. Ensure that the converted component	580 // Converted successfully. Ensure that the converted component

558 // can be safely displayed to the user.	581 // can be safely displayed to the user.

559 out->resize(original_length + output_length);	582 out->resize(original_length + output_length);

560 if (IsIDNComponentSafe(	583 if (IsIDNComponentSafe(

561 base::StringPiece16(out->data() + original_length,	584 base::StringPiece16(out->data() + original_length,

562 base::checked_cast<size_t>(output_length))))	585 base::checked_cast<size_t>(output_length)),

	586 is_tld_ascii))

563 return true;	587 return true;

564 }	588 }

565	589

566 // Something went wrong. Revert to original string.	590 // Something went wrong. Revert to original string.

567 out->resize(original_length);	591 out->resize(original_length);

568 }	592 }

569	593

570 // We get here with no IDN or on error, in which case we just append the	594 // We get here with no IDN or on error, in which case we just append the

571 // literal input.	595 // literal input.

572 out->append(comp, comp_len);	596 out->append(comp, comp_len);

(...skipping 226 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
799 return base::StartsWith(text, www, base::CompareCase::SENSITIVE)	823 return base::StartsWith(text, www, base::CompareCase::SENSITIVE)

800 ? text.substr(www.length()) : text;	824 ? text.substr(www.length()) : text;

801 }	825 }

802	826

803 base::string16 StripWWWFromHost(const GURL& url) {	827 base::string16 StripWWWFromHost(const GURL& url) {

804 DCHECK(url.is_valid());	828 DCHECK(url.is_valid());

805 return StripWWW(base::ASCIIToUTF16(url.host_piece()));	829 return StripWWW(base::ASCIIToUTF16(url.host_piece()));

806 }	830 }

807	831

808 } // namespace url_formatter	832 } // namespace url_formatter

OLD	NEW

« no previous file with comments | « no previous file | components/url_formatter/url_formatter_unittest.cc » ('j') | no next file with comments »