Chromium Code Reviews| Index: components/url_formatter/idn_spoof_checker.h |
| diff --git a/components/url_formatter/idn_spoof_checker.h b/components/url_formatter/idn_spoof_checker.h |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..a1b25e5ce7f8e84ac4ad1ccb1462c42c78915f02 |
| --- /dev/null |
| +++ b/components/url_formatter/idn_spoof_checker.h |
| @@ -0,0 +1,68 @@ |
| +// Copyright 2017 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +// A helper class for IDN Spoof checking, used to ensure that no IDN input is |
|
Peter Kasting
2017/05/10 22:38:46
Nit: Maybe this should be just above the class its
jungshik at Google
2017/05/14 09:36:23
Done.
|
| +// spoofable per Chromium's standard of spoofability. For a more thorough |
| +// explanation of how spoof checking works in Chromium, see |
| +// http://dev.chromium.org/developers/design-documents/idn-in-google-chrome . |
| + |
| +#ifndef COMPONENTS_URL_FORMATTER_IDN_SPOOF_CHECKER_H_ |
| +#define COMPONENTS_URL_FORMATTER_IDN_SPOOF_CHECKER_H_ |
| + |
| +#include <memory> |
| +#include <string> |
| + |
| +#include "base/strings/string16.h" |
| +#include "base/strings/string_piece.h" |
| +#include "third_party/icu/source/common/unicode/uniset.h" |
| +#include "third_party/icu/source/common/unicode/unistr.h" |
| +#include "third_party/icu/source/common/unicode/utypes.h" |
| +#include "third_party/icu/source/i18n/unicode/regex.h" |
| +#include "third_party/icu/source/i18n/unicode/translit.h" |
| +#include "third_party/icu/source/i18n/unicode/uspoof.h" |
|
Peter Kasting
2017/05/10 22:38:46
Nit: Seems like most of these #includes are not ne
jungshik at Google
2017/05/14 09:36:23
Actually, I tried forward declarations before fall
|
| + |
| +namespace url_formatter { |
| + |
| +class IDNSpoofChecker { |
| + public: |
| + IDNSpoofChecker(); |
| + ~IDNSpoofChecker(); |
| + |
| + // Returns true if |label| is safe to display as Unicode. In the event of |
| + // library failure, all IDN inputs will be treated as unsafe. |
| + // See the function body for details on the specific safety checks performed. |
| + bool SafeToDisplayAsUnicode(base::StringPiece16 label, bool is_tld_ascii); |
| + |
| + // Returns true if |hostname| or the last few components of |hostname| looks |
| + // similar to one of top domains listed in top_domains/alexa_domains.list. Two |
| + // checks are done: |
| + // 1. Calculate the skeleton of |hostname| based on the Unicode confusable |
| + // character list and look it up in the pre-calculated skeleton list of |
| + // top domains. |
| + // 2. Look up the diacritic-free version of |hostname| in the list of |
| + // top domains. Note that non-IDN hostnames will not get here. |
| + bool SimilarToTopDomains(base::StringPiece16 hostname); |
| + |
| + private: |
| + // Sets allowed characters in IDN labels and turn on USPOOF_CHAR_LIMIT. |
|
Peter Kasting
2017/05/10 22:38:46
Nit: turns
jungshik at Google
2017/05/14 09:36:23
Done.
|
| + void SetAllowedUnicodeSet(UErrorCode* status); |
| + bool IsMadeOfLatinAlikeCyrillic(const icu::UnicodeString& label_string); |
|
Peter Kasting
2017/05/10 22:38:46
Nit: These other two functions don't have comments
jungshik at Google
2017/05/14 09:36:23
will add desc.
|
| + bool GetSkeleton(base::StringPiece16 hostname, std::string* skeleton); |
| + |
| + USpoofChecker* checker_; |
| + icu::UnicodeSet deviation_characters_; |
| + icu::UnicodeSet non_ascii_latin_letters_; |
| + icu::UnicodeSet kana_letters_exceptions_; |
| + icu::UnicodeSet combining_diacritics_exceptions_; |
| + icu::UnicodeSet cyrillic_letters_; |
| + icu::UnicodeSet cyrillic_letters_latin_alike_; |
| + icu::UnicodeSet lgc_letters_n_ascii_; |
| + std::unique_ptr<icu::Transliterator> transliterator_; |
| + |
| + DISALLOW_COPY_AND_ASSIGN(IDNSpoofChecker); |
| +}; |
| + |
| +} // namespace url_formatter |
| + |
| +#endif // COMPONENTS_URL_FORMATTER_IDN_SPOOF_CHECKER_H_ |