Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // A helper class for IDN Spoof checking, used to ensure that no IDN input is | |
|
Peter Kasting
2017/05/10 22:38:46
Nit: Maybe this should be just above the class its
jungshik at Google
2017/05/14 09:36:23
Done.
| |
| 6 // spoofable per Chromium's standard of spoofability. For a more thorough | |
| 7 // explanation of how spoof checking works in Chromium, see | |
| 8 // http://dev.chromium.org/developers/design-documents/idn-in-google-chrome . | |
| 9 | |
| 10 #ifndef COMPONENTS_URL_FORMATTER_IDN_SPOOF_CHECKER_H_ | |
| 11 #define COMPONENTS_URL_FORMATTER_IDN_SPOOF_CHECKER_H_ | |
| 12 | |
| 13 #include <memory> | |
| 14 #include <string> | |
| 15 | |
| 16 #include "base/strings/string16.h" | |
| 17 #include "base/strings/string_piece.h" | |
| 18 #include "third_party/icu/source/common/unicode/uniset.h" | |
| 19 #include "third_party/icu/source/common/unicode/unistr.h" | |
| 20 #include "third_party/icu/source/common/unicode/utypes.h" | |
| 21 #include "third_party/icu/source/i18n/unicode/regex.h" | |
| 22 #include "third_party/icu/source/i18n/unicode/translit.h" | |
| 23 #include "third_party/icu/source/i18n/unicode/uspoof.h" | |
|
Peter Kasting
2017/05/10 22:38:46
Nit: Seems like most of these #includes are not ne
jungshik at Google
2017/05/14 09:36:23
Actually, I tried forward declarations before fall
| |
| 24 | |
| 25 namespace url_formatter { | |
| 26 | |
| 27 class IDNSpoofChecker { | |
| 28 public: | |
| 29 IDNSpoofChecker(); | |
| 30 ~IDNSpoofChecker(); | |
| 31 | |
| 32 // Returns true if |label| is safe to display as Unicode. In the event of | |
| 33 // library failure, all IDN inputs will be treated as unsafe. | |
| 34 // See the function body for details on the specific safety checks performed. | |
| 35 bool SafeToDisplayAsUnicode(base::StringPiece16 label, bool is_tld_ascii); | |
| 36 | |
| 37 // Returns true if |hostname| or the last few components of |hostname| looks | |
| 38 // similar to one of top domains listed in top_domains/alexa_domains.list. Two | |
| 39 // checks are done: | |
| 40 // 1. Calculate the skeleton of |hostname| based on the Unicode confusable | |
| 41 // character list and look it up in the pre-calculated skeleton list of | |
| 42 // top domains. | |
| 43 // 2. Look up the diacritic-free version of |hostname| in the list of | |
| 44 // top domains. Note that non-IDN hostnames will not get here. | |
| 45 bool SimilarToTopDomains(base::StringPiece16 hostname); | |
| 46 | |
| 47 private: | |
| 48 // Sets allowed characters in IDN labels and turn on USPOOF_CHAR_LIMIT. | |
|
Peter Kasting
2017/05/10 22:38:46
Nit: turns
jungshik at Google
2017/05/14 09:36:23
Done.
| |
| 49 void SetAllowedUnicodeSet(UErrorCode* status); | |
| 50 bool IsMadeOfLatinAlikeCyrillic(const icu::UnicodeString& label_string); | |
|
Peter Kasting
2017/05/10 22:38:46
Nit: These other two functions don't have comments
jungshik at Google
2017/05/14 09:36:23
will add desc.
| |
| 51 bool GetSkeleton(base::StringPiece16 hostname, std::string* skeleton); | |
| 52 | |
| 53 USpoofChecker* checker_; | |
| 54 icu::UnicodeSet deviation_characters_; | |
| 55 icu::UnicodeSet non_ascii_latin_letters_; | |
| 56 icu::UnicodeSet kana_letters_exceptions_; | |
| 57 icu::UnicodeSet combining_diacritics_exceptions_; | |
| 58 icu::UnicodeSet cyrillic_letters_; | |
| 59 icu::UnicodeSet cyrillic_letters_latin_alike_; | |
| 60 icu::UnicodeSet lgc_letters_n_ascii_; | |
| 61 std::unique_ptr<icu::Transliterator> transliterator_; | |
| 62 | |
| 63 DISALLOW_COPY_AND_ASSIGN(IDNSpoofChecker); | |
| 64 }; | |
| 65 | |
| 66 } // namespace url_formatter | |
| 67 | |
| 68 #endif // COMPONENTS_URL_FORMATTER_IDN_SPOOF_CHECKER_H_ | |
| OLD | NEW |