components/url_formatter/url_formatter.cc - Issue 2784933002: Mitigate spoofing attempt using Latin letters.

Side by Side Diff: components/url_formatter/url_formatter.cc

Issue 2784933002: Mitigate spoofing attempt using Latin letters. (Closed)

Patch Set: pull IDNSpoofChecker to separae h/cc files Created 3 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« components/url_formatter/top_domains/top.list ('K') | « components/url_formatter/top_domains/top.list ('k') | components/url_formatter/url_formatter_unittest.cc » ('j') | components/url_formatter/url_formatter_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2015 The Chromium Authors. All rights reserved.	1 // Copyright 2015 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/url_formatter/url_formatter.h"	5 #include "components/url_formatter/url_formatter.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <utility>	8 #include <utility>

	9 #include <vector>

9	10

10 #include "base/lazy_instance.h"	11 #include "base/lazy_instance.h"

11 #include "base/macros.h"	12 #include "base/macros.h"

12 #include "base/numerics/safe_conversions.h"	13 #include "base/numerics/safe_conversions.h"

13 #include "base/strings/string_piece.h"	14 #include "base/strings/string_piece.h"

14 #include "base/strings/string_util.h"	15 #include "base/strings/string_util.h"

15 #include "base/strings/utf_offset_string_conversions.h"	16 #include "base/strings/utf_offset_string_conversions.h"

16 #include "base/strings/utf_string_conversions.h"	17 #include "base/strings/utf_string_conversions.h"

17 #include "base/threading/thread_local_storage.h"	18 #include "base/threading/thread_local_storage.h"

18 #include "third_party/icu/source/common/unicode/schriter.h"	19 #include "components/url_formatter/idn_spoof_checker.h"

19 #include "third_party/icu/source/common/unicode/uidna.h"	20 #include "third_party/icu/source/common/unicode/uidna.h"

20 #include "third_party/icu/source/common/unicode/uniset.h"	21 #include "third_party/icu/source/common/unicode/utypes.h"

21 #include "third_party/icu/source/common/unicode/uscript.h"

22 #include "third_party/icu/source/common/unicode/uvernum.h"

23 #include "third_party/icu/source/i18n/unicode/regex.h"

24 #include "third_party/icu/source/i18n/unicode/uspoof.h"

25 #include "url/gurl.h"	22 #include "url/gurl.h"

26 #include "url/third_party/mozilla/url_parse.h"	23 #include "url/third_party/mozilla/url_parse.h"

27	24

28 namespace url_formatter {	25 namespace url_formatter {

29	26

30 namespace {	27 namespace {

31	28

32 base::string16 IDNToUnicodeWithAdjustments(	29 base::string16 IDNToUnicodeWithAdjustments(

33 base::StringPiece host,	30 base::StringPiece host,

34 base::OffsetAdjuster::Adjustments* adjustments);	31 base::OffsetAdjuster::Adjustments* adjustments);

(...skipping 149 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
184 new_parsed->scheme.len = kViewSourceLength - 1;	181 new_parsed->scheme.len = kViewSourceLength - 1;

185 }	182 }

186 AdjustAllComponentsButScheme(kViewSourceLength, new_parsed);	183 AdjustAllComponentsButScheme(kViewSourceLength, new_parsed);

187	184

188 if (prefix_end)	185 if (prefix_end)

189 *prefix_end += kViewSourceLength;	186 *prefix_end += kViewSourceLength;

190	187

191 return result;	188 return result;

192 }	189 }

193	190

	191 base::LazyInstance<IDNSpoofChecker>::Leaky g_idn_spoof_checker =

	192 LAZY_INSTANCE_INITIALIZER;

	193 #if 0
	Peter Kasting 2017/05/10 22:38:47 Remove before landing Remove before landing jungshik at Google 2017/05/14 09:36:23 Done. Show quoted text On 2017/05/10 22:38:47, Peter Kasting wrote: > Remove before landing Done.
	194 base::ThreadLocalStorage::StaticSlot tls_index = TLS_INITIALIZER;

	195

	196 void OnThreadTermination(void* regex_matcher) {

	197 delete reinterpret_cast<icu::RegexMatcher*>(regex_matcher);

	198 }

	199 #endif

	200

194 // TODO(brettw): We may want to skip this step in the case of file URLs to	201 // TODO(brettw): We may want to skip this step in the case of file URLs to

195 // allow unicode UNC hostnames regardless of encodings.	202 // allow unicode UNC hostnames regardless of encodings.

196 base::string16 IDNToUnicodeWithAdjustments(	203 base::string16 IDNToUnicodeWithAdjustments(

197 base::StringPiece host, base::OffsetAdjuster::Adjustments* adjustments) {	204 base::StringPiece host, base::OffsetAdjuster::Adjustments* adjustments) {

198 if (adjustments)	205 if (adjustments)

199 adjustments->clear();	206 adjustments->clear();

200 // Convert the ASCII input to a base::string16 for ICU.	207 // Convert the ASCII input to a base::string16 for ICU.

201 base::string16 input16;	208 base::string16 input16;

202 input16.reserve(host.length());	209 input16.reserve(host.length());

203 input16.insert(input16.end(), host.begin(), host.end());	210 input16.insert(input16.end(), host.begin(), host.end());

204	211

205 bool is_tld_ascii = true;	212 bool is_tld_ascii = true;

206 size_t last_dot = host.rfind('.');	213 size_t last_dot = host.rfind('.');

207 if (last_dot != base::StringPiece::npos &&	214 if (last_dot != base::StringPiece::npos &&

208 host.substr(last_dot).starts_with(".xn--")) {	215 host.substr(last_dot).starts_with(".xn--")) {

209 is_tld_ascii = false;	216 is_tld_ascii = false;

210 }	217 }

211	218

212 // Do each component of the host separately, since we enforce script matching	219 // Do each component of the host separately, since we enforce script matching

213 // on a per-component basis.	220 // on a per-component basis.

214 base::string16 out16;	221 base::string16 out16;

	222 bool has_idn_component = false;

215 for (size_t component_start = 0, component_end;	223 for (size_t component_start = 0, component_end;

216 component_start < input16.length();	224 component_start < input16.length();

217 component_start = component_end + 1) {	225 component_start = component_end + 1) {

218 // Find the end of the component.	226 // Find the end of the component.

219 component_end = input16.find('.', component_start);	227 component_end = input16.find('.', component_start);

220 if (component_end == base::string16::npos)	228 if (component_end == base::string16::npos)

221 component_end = input16.length(); // For getting the last component.	229 component_end = input16.length(); // For getting the last component.

222 size_t component_length = component_end - component_start;	230 size_t component_length = component_end - component_start;

223 size_t new_component_start = out16.length();	231 size_t new_component_start = out16.length();

224 bool converted_idn = false;	232 bool converted_idn = false;

225 if (component_end > component_start) {	233 if (component_end > component_start) {

226 // Add the substring that we just found.	234 // Add the substring that we just found.

227 converted_idn =	235 converted_idn =

228 IDNToUnicodeOneComponent(input16.data() + component_start,	236 IDNToUnicodeOneComponent(input16.data() + component_start,

229 component_length, is_tld_ascii, &out16);	237 component_length, is_tld_ascii, &out16);

	238 has_idn_component \|= converted_idn;

230 }	239 }

231 size_t new_component_length = out16.length() - new_component_start;	240 size_t new_component_length = out16.length() - new_component_start;

232	241

233 if (converted_idn && adjustments) {	242 if (converted_idn && adjustments) {

234 adjustments->push_back(base::OffsetAdjuster::Adjustment(	243 adjustments->push_back(base::OffsetAdjuster::Adjustment(

235 component_start, component_length, new_component_length));	244 component_start, component_length, new_component_length));

236 }	245 }

237	246

238 // Need to add the dot we just found (if we found one).	247 // Need to add the dot we just found (if we found one).

239 if (component_end < input16.length())	248 if (component_end < input16.length())

240 out16.push_back('.');	249 out16.push_back('.');

241 }	250 }

	251

	252 // Leave as punycode any inputs that spoof top domains.

	253 if (has_idn_component &&

	254 g_idn_spoof_checker.Get().SimilarToTopDomains(out16)) {

	255 if (adjustments)

	256 adjustments->clear();

	257 return input16;

	258 }

	259

242 return out16;	260 return out16;

243 }	261 }

244	262

245 // A helper class for IDN Spoof checking, used to ensure that no IDN input is

246 // spoofable per Chromium's standard of spoofability. For a more thorough

247 // explanation of how spoof checking works in Chromium, see

248 // http://dev.chromium.org/developers/design-documents/idn-in-google-chrome .

249 class IDNSpoofChecker {

250 public:

251 IDNSpoofChecker();

252

253 // Returns true if \|label\| is safe to display as Unicode. When the TLD is

254 // ASCII, check if a label is entirely made of Cyrillic letters that look like

255 // Latin letters. In the event of library failure, all IDN inputs will be

256 // treated as unsafe.

257 bool Check(base::StringPiece16 label, bool is_tld_ascii);

258

259 private:

260 void SetAllowedUnicodeSet(UErrorCode* status);

261 bool IsMadeOfLatinAlikeCyrillic(const icu::UnicodeString& label_string);

262

263 USpoofChecker* checker_;

264 icu::UnicodeSet deviation_characters_;

265 icu::UnicodeSet non_ascii_latin_letters_;

266 icu::UnicodeSet kana_letters_exceptions_;

267 icu::UnicodeSet cyrillic_letters_;

268 icu::UnicodeSet cyrillic_letters_latin_alike_;

269

270 DISALLOW_COPY_AND_ASSIGN(IDNSpoofChecker);

271 };

272

273 base::LazyInstance<IDNSpoofChecker>::Leaky g_idn_spoof_checker =

274 LAZY_INSTANCE_INITIALIZER;

275 base::ThreadLocalStorage::StaticSlot tls_index = TLS_INITIALIZER;

276

277 void OnThreadTermination(void* regex_matcher) {

278 delete reinterpret_cast<icu::RegexMatcher*>(regex_matcher);

279 }

280

281 IDNSpoofChecker::IDNSpoofChecker() {

282 UErrorCode status = U_ZERO_ERROR;

283 checker_ = uspoof_open(&status);

284 if (U_FAILURE(status)) {

285 checker_ = nullptr;

286 return;

287 }

288

289 // At this point, USpoofChecker has all the checks enabled except

290 // for USPOOF_CHAR_LIMIT (USPOOF_{RESTRICTION_LEVEL, INVISIBLE,

291 // MIXED_SCRIPT_CONFUSABLE, WHOLE_SCRIPT_CONFUSABLE, MIXED_NUMBERS, ANY_CASE})

292 // This default configuration is adjusted below as necessary.

293

294 // Set the restriction level to moderate. It allows mixing Latin with another

295 // script (+ COMMON and INHERITED). Except for Chinese(Han + Bopomofo),

296 // Japanese(Hiragana + Katakana + Han), and Korean(Hangul + Han), only one

297 // script other than Common and Inherited can be mixed with Latin. Cyrillic

298 // and Greek are not allowed to mix with Latin.

299 // See http://www.unicode.org/reports/tr39/#Restriction_Level_Detection

300 uspoof_setRestrictionLevel(checker_, USPOOF_MODERATELY_RESTRICTIVE);

301

302 // Restrict allowed characters in IDN labels and turn on USPOOF_CHAR_LIMIT.

303 SetAllowedUnicodeSet(&status);

304

305 // Enable the return of auxillary (non-error) information.

306 // We used to disable WHOLE_SCRIPT_CONFUSABLE check explicitly, but as of

307 // ICU 58.1, WSC is a no-op in a single string check API.

308 int32_t checks = uspoof_getChecks(checker_, &status) \| USPOOF_AUX_INFO;

309 uspoof_setChecks(checker_, checks, &status);

310

311 // Four characters handled differently by IDNA 2003 and IDNA 2008. UTS46

312 // transitional processing treats them as IDNA 2003 does; maps U+00DF and

313 // U+03C2 and drops U+200[CD].

314 deviation_characters_ =

315 icu::UnicodeSet(UNICODE_STRING_SIMPLE("[\\u00df\\u03c2\\u200c\\u200d]"),

316 status);

317 deviation_characters_.freeze();

318

319 // Latin letters outside ASCII. 'Script_Extensions=Latin' is not necessary

320 // because additional characters pulled in with scx=Latn are not included in

321 // the allowed set.

322 non_ascii_latin_letters_ = icu::UnicodeSet(

323 UNICODE_STRING_SIMPLE("[[:Latin:] - [a-zA-Z]]"), status);

324 non_ascii_latin_letters_.freeze();

325

326 // These letters are parts of \|dangerous_patterns_\|.

327 kana_letters_exceptions_ = icu::UnicodeSet(

328 UNICODE_STRING_SIMPLE("[\\u3078-\\u307a\\u30d8-\\u30da\\u30fb-\\u30fe]"),

329 status);

330 kana_letters_exceptions_.freeze();

331

332 // These Cyrillic letters look like Latin. A domain label entirely made of

333 // these letters is blocked as a simplified whole-script-spoofable.

334 cyrillic_letters_latin_alike_ =

335 icu::UnicodeSet(icu::UnicodeString("[асԁеһіјӏорԛѕԝхуъЬҽпгѵѡ]"), status);

336 cyrillic_letters_latin_alike_.freeze();

337

338 cyrillic_letters_ =

339 icu::UnicodeSet(UNICODE_STRING_SIMPLE("[[:Cyrl:]]"), status);

340 cyrillic_letters_.freeze();

341

342 DCHECK(U_SUCCESS(status));

343 }

344

345 bool IDNSpoofChecker::Check(base::StringPiece16 label, bool is_tld_ascii) {

346 UErrorCode status = U_ZERO_ERROR;

347 int32_t result = uspoof_check(checker_, label.data(),

348 base::checked_cast<int32_t>(label.size()),

349 NULL, &status);

350 // If uspoof_check fails (due to library failure), or if any of the checks

351 // fail, treat the IDN as unsafe.

352 if (U_FAILURE(status) \|\| (result & USPOOF_ALL_CHECKS))

353 return false;

354

355 icu::UnicodeString label_string(FALSE, label.data(),

356 base::checked_cast<int32_t>(label.size()));

357

358 // A punycode label with 'xn--' prefix is not subject to the URL

359 // canonicalization and is stored as it is in GURL. If it encodes a deviation

360 // character (UTS 46; e.g. U+00DF/sharp-s), it should be still shown in

361 // punycode instead of Unicode. Without this check, xn--fu-hia for

362 // 'fu<sharp-s>' would be converted to 'fu<sharp-s>' for display because

363 // "UTS 46 section 4 Processing step 4" applies validity criteria for

364 // non-transitional processing (i.e. do not map deviation characters) to any

365 // punycode labels regardless of whether transitional or non-transitional is

366 // chosen. On the other hand, 'fu<sharp-s>' typed or copy and pasted

367 // as Unicode would be canonicalized to 'fuss' by GURL and is displayed as

368 // such. See http://crbug.com/595263 .

369 if (deviation_characters_.containsSome(label_string))

370 return false;

371

372 // If there's no script mixing, the input is regarded as safe without any

373 // extra check unless it contains Kana letter exceptions or it's made entirely

374 // of Cyrillic letters that look like Latin letters. Note that the following

375 // combinations of scripts are treated as a 'logical' single script.

376 // - Chinese: Han, Bopomofo, Common

377 // - Japanese: Han, Hiragana, Katakana, Common

378 // - Korean: Hangul, Han, Common

379 result &= USPOOF_RESTRICTION_LEVEL_MASK;

380 if (result == USPOOF_ASCII) return true;

381 if (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE &&

382 kana_letters_exceptions_.containsNone(label_string)) {

383 // Check Cyrillic confusable only for ASCII TLDs.

384 return !is_tld_ascii \|\| !IsMadeOfLatinAlikeCyrillic(label_string);

385 }

386

387 // Additional checks for \|label\| with multiple scripts, one of which is Latin.

388 // Disallow non-ASCII Latin letters to mix with a non-Latin script.

389 if (non_ascii_latin_letters_.containsSome(label_string))

390 return false;

391

392 if (!tls_index.initialized())

393 tls_index.Initialize(&OnThreadTermination);

394 icu::RegexMatcher* dangerous_pattern =

395 reinterpret_cast<icu::RegexMatcher*>(tls_index.Get());

396 if (!dangerous_pattern) {

397 // Disallow the katakana no, so, zo, or n, as they may be mistaken for

398 // slashes when they're surrounded by non-Japanese scripts (i.e. scripts

399 // other than Katakana, Hiragana or Han). If {no, so, zo, n} next to a

400 // non-Japanese script on either side is disallowed, legitimate cases like

401 // '{vitamin in Katakana}b6' are blocked. Note that trying to block those

402 // characters when used alone as a label is futile because those cases

403 // would not reach here.

404 // Also disallow what used to be blocked by mixed-script-confusable (MSC)

405 // detection. ICU 58 does not detect MSC any more for a single input string.

406 // See http://bugs.icu-project.org/trac/ticket/12823 .

407 // TODO(jshin): adjust the pattern once the above ICU bug is fixed.

408 // - Disallow U+30FB (Katakana Middle Dot) and U+30FC (Hiragana-Katakana

409 // Prolonged Sound) used out-of-context.

410 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark)

411 // unless they're preceded by a Katakana.

412 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters

413 // (U+30D[8-A]) that look exactly like each other when they're used in a

414 // label otherwise entirely in Katakna or Hiragana.

415 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small

416 // Letter Co) to be next to Latin.

417 // - Disallow Latin 'o' and 'g' next to Armenian.

418 dangerous_pattern = new icu::RegexMatcher(

419 icu::UnicodeString(

420 "[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]"

421 "[\\u30ce\\u30f3\\u30bd\\u30be]"

422 "[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]\|"

423 "[^\\p{scx=kana}\\p{scx=hira}]\\u30fc\|^\\u30fc\|"

424 "[^\\p{scx=kana}][\\u30fd\\u30fe]\|^[\\u30fd\\u30fe]\|"

425 "^[\\p{scx=kana}]+[\\u3078-\\u307a][\\p{scx=kana}]+$\|"

426 "^[\\p{scx=hira}]+[\\u30d8-\\u30da][\\p{scx=hira}]+$\|"

427 "[a-z]\\u30fb\|\\u30fb[a-z]\|"

428 "^[\\u0585\\u0581]+[a-z]\|[a-z][\\u0585\\u0581]+$\|"

429 "[a-z][\\u0585\\u0581]+[a-z]\|"

430 "^[og]+[\\p{scx=armn}]\|[\\p{scx=armn}][og]+$\|"

431 "[\\p{scx=armn}][og]+[\\p{scx=armn}]",

432 -1, US_INV),

433 0, status);

434 tls_index.Set(dangerous_pattern);

435 }

436 dangerous_pattern->reset(label_string);

437 return !dangerous_pattern->find();

438 }

439

440 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic(

441 const icu::UnicodeString& label_string) {

442 // Collect all the Cyrillic letters in \|label_string\| and see if they're

443 // a subset of \|cyrillic_letters_latin_alike_\|.

444 // A shortcut of defining cyrillic_letters_latin_alike_ to include [0-9] and

445 // [_-] and checking if the set contains all letters of \|label_string\|

446 // would work in most cases, but not if a label has non-letters outside

447 // ASCII.

448 icu::UnicodeSet cyrillic_in_label;

449 icu::StringCharacterIterator it(label_string);

450 for (it.setToStart(); it.hasNext();) {

451 const UChar32 c = it.next32PostInc();

452 if (cyrillic_letters_.contains(c))

453 cyrillic_in_label.add(c);

454 }

455 return !cyrillic_in_label.isEmpty() &&

456 cyrillic_letters_latin_alike_.containsAll(cyrillic_in_label);

457 }

458

459 void IDNSpoofChecker::SetAllowedUnicodeSet(UErrorCode* status) {

460 if (U_FAILURE(*status))

461 return;

462

463 // The recommended set is a set of characters for identifiers in a

464 // security-sensitive environment taken from UTR 39

465 // (http://unicode.org/reports/tr39/) and

466 // http://www.unicode.org/Public/security/latest/xidmodifications.txt .

467 // The inclusion set comes from "Candidate Characters for Inclusion

468 // in idenfiers" of UTR 31 (http://www.unicode.org/reports/tr31). The list

469 // may change over the time and will be updated whenever the version of ICU

470 // used in Chromium is updated.

471 const icu::UnicodeSet* recommended_set =

472 uspoof_getRecommendedUnicodeSet(status);

473 icu::UnicodeSet allowed_set;

474 allowed_set.addAll(*recommended_set);

475 const icu::UnicodeSet* inclusion_set = uspoof_getInclusionUnicodeSet(status);

476 allowed_set.addAll(*inclusion_set);

477

478 // Five aspirational scripts are taken from UTR 31 Table 6 at

479 // http://www.unicode.org/reports/tr31/#Aspirational_Use_Scripts .

480 // Not all the characters of aspirational scripts are suitable for

481 // identifiers. Therefore, only characters belonging to

482 // [:Identifier_Type=Aspirational:] (listed in 'Status/Type=Aspirational'

483 // section at

484 // http://www.unicode.org/Public/security/latest/xidmodifications.txt) are

485 // are added to the allowed set. The list has to be updated when a new

486 // version of Unicode is released. The current version is 9.0.0 and ICU 60

487 // will have Unicode 10.0 data.

488 #if U_ICU_VERSION_MAJOR_NUM < 60

489 const icu::UnicodeSet aspirational_scripts(

490 icu::UnicodeString(

491 // Unified Canadian Syllabics

492 "[\\u1401-\\u166C\\u166F-\\u167F"

493 // Mongolian

494 "\\u1810-\\u1819\\u1820-\\u1877\\u1880-\\u18AA"

495 // Unified Canadian Syllabics

496 "\\u18B0-\\u18F5"

497 // Tifinagh

498 "\\u2D30-\\u2D67\\u2D7F"

499 // Yi

500 "\\uA000-\\uA48C"

501 // Miao

502 "\\U00016F00-\\U00016F44\\U00016F50-\\U00016F7E"

503 "\\U00016F8F-\\U00016F9F]",

504 -1, US_INV),

505 *status);

506 allowed_set.addAll(aspirational_scripts);

507 #else

508 #error "Update aspirational_scripts per Unicode 10.0"

509 #endif

510

511 // U+0338 is included in the recommended set, while U+05F4 and U+2027 are in

512 // the inclusion set. However, they are blacklisted as a part of Mozilla's

513 // IDN blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars).

514 // U+2010 is in the inclusion set, but we drop it because it can be confused

515 // with an ASCII U+002D (Hyphen-Minus).

516 // U+0338 and U+2027 are dropped; the former can look like a slash when

517 // rendered with a broken font, and the latter can be confused with U+30FB

518 // (Katakana Middle Dot). U+05F4 (Hebrew Punctuation Gershayim) is kept,

519 // even though it can look like a double quotation mark. Using it in Hebrew

520 // should be safe. When used with a non-Hebrew script, it'd be filtered by

521 // other checks in place.

522 allowed_set.remove(0x338u); // Combining Long Solidus Overlay

523 allowed_set.remove(0x2010u); // Hyphen

524 allowed_set.remove(0x2027u); // Hyphenation Point

525

526 #if defined(OS_MACOSX)

527 // The following characters are reported as present in the default macOS

528 // system UI font, but they render as blank. Remove them from the allowed

529 // set to prevent spoofing.

530 // Tibetan characters used for transliteration of ancient texts:

531 allowed_set.remove(0x0F8Cu);

532 allowed_set.remove(0x0F8Du);

533 allowed_set.remove(0x0F8Eu);

534 allowed_set.remove(0x0F8Fu);

535 #endif

536

537 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status);

538 }

539	263

540 // Returns true if the given Unicode host component is safe to display to the	264 // Returns true if the given Unicode host component is safe to display to the

541 // user. Note that this function does not deal with pure ASCII domain labels at	265 // user. Note that this function does not deal with pure ASCII domain labels at

542 // all even though it's possible to make up look-alike labels with ASCII	266 // all even though it's possible to make up look-alike labels with ASCII

543 // characters alone.	267 // characters alone.

544 bool IsIDNComponentSafe(base::StringPiece16 label, bool is_tld_ascii) {	268 bool IsIDNComponentSafe(base::StringPiece16 label, bool is_tld_ascii) {

545 return g_idn_spoof_checker.Get().Check(label, is_tld_ascii);	269 return g_idn_spoof_checker.Get().SafeToDisplayAsUnicode(label, is_tld_ascii);

546 }	270 }

547	271

548 // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to	272 // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to

549 // a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().	273 // a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().

550 //	274 //

551 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with the	275 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with the

552 // backward compatibility in mind. What it does:	276 // backward compatibility in mind. What it does:

553 //	277 //

554 // 1. Use the up-to-date Unicode data.	278 // 1. Use the up-to-date Unicode data.

555 // 2. Define a case folding/mapping with the up-to-date Unicode data as in	279 // 2. Define a case folding/mapping with the up-to-date Unicode data as in

(...skipping 305 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
861 return base::StartsWith(text, www, base::CompareCase::SENSITIVE)	585 return base::StartsWith(text, www, base::CompareCase::SENSITIVE)

862 ? text.substr(www.length()) : text;	586 ? text.substr(www.length()) : text;

863 }	587 }

864	588

865 base::string16 StripWWWFromHost(const GURL& url) {	589 base::string16 StripWWWFromHost(const GURL& url) {

866 DCHECK(url.is_valid());	590 DCHECK(url.is_valid());

867 return StripWWW(base::ASCIIToUTF16(url.host_piece()));	591 return StripWWW(base::ASCIIToUTF16(url.host_piece()));

868 }	592 }

869	593

870 } // namespace url_formatter	594 } // namespace url_formatter

OLD	NEW