OLD | NEW |
---|---|
1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef COMPONENTS_URL_FORMATTER_IDN_SPOOF_CHECKER_H_ | 5 #ifndef COMPONENTS_URL_FORMATTER_IDN_SPOOF_CHECKER_H_ |
6 #define COMPONENTS_URL_FORMATTER_IDN_SPOOF_CHECKER_H_ | 6 #define COMPONENTS_URL_FORMATTER_IDN_SPOOF_CHECKER_H_ |
7 | 7 |
8 #include <memory> | 8 #include <memory> |
9 #include <string> | 9 #include <string> |
10 | 10 |
11 #include "base/strings/string16.h" | 11 #include "base/strings/string16.h" |
12 #include "base/strings/string_piece.h" | 12 #include "base/strings/string_piece.h" |
13 #include "third_party/icu/source/common/unicode/uniset.h" | 13 #include "third_party/icu/source/common/unicode/uniset.h" |
14 #include "third_party/icu/source/common/unicode/utypes.h" | 14 #include "third_party/icu/source/common/unicode/utypes.h" |
15 #include "third_party/icu/source/common/unicode/uversion.h" | 15 #include "third_party/icu/source/common/unicode/uversion.h" |
16 | 16 |
17 // 'icu' does not work. Use U_ICU_NAMESPACE. | 17 // 'icu' does not work. Use U_ICU_NAMESPACE. |
18 namespace U_ICU_NAMESPACE { | 18 namespace U_ICU_NAMESPACE { |
19 | 19 |
20 class Transliterator; | |
20 class UnicodeString; | 21 class UnicodeString; |
21 | 22 |
22 } // namespace U_ICU_NAMESPACE | 23 } // namespace U_ICU_NAMESPACE |
23 | 24 |
24 struct USpoofChecker; | 25 struct USpoofChecker; |
25 | 26 |
26 namespace url_formatter { | 27 namespace url_formatter { |
27 | 28 |
28 // A helper class for IDN Spoof checking, used to ensure that no IDN input is | 29 // A helper class for IDN Spoof checking, used to ensure that no IDN input is |
29 // spoofable per Chromium's standard of spoofability. For a more thorough | 30 // spoofable per Chromium's standard of spoofability. For a more thorough |
30 // explanation of how spoof checking works in Chromium, see | 31 // explanation of how spoof checking works in Chromium, see |
31 // http://dev.chromium.org/developers/design-documents/idn-in-google-chrome . | 32 // http://dev.chromium.org/developers/design-documents/idn-in-google-chrome . |
32 | 33 |
33 class IDNSpoofChecker { | 34 class IDNSpoofChecker { |
34 public: | 35 public: |
35 IDNSpoofChecker(); | 36 IDNSpoofChecker(); |
36 ~IDNSpoofChecker(); | 37 ~IDNSpoofChecker(); |
37 | 38 |
38 // Returns true if |label| is safe to display as Unicode. In the event of | 39 // Returns true if |label| is safe to display as Unicode. In the event of |
39 // library failure, all IDN inputs will be treated as unsafe. | 40 // library failure, all IDN inputs will be treated as unsafe. |
40 // See the function body for details on the specific safety checks performed. | 41 // See the function body for details on the specific safety checks performed. |
41 bool SafeToDisplayAsUnicode(base::StringPiece16 label, bool is_tld_ascii); | 42 bool SafeToDisplayAsUnicode(base::StringPiece16 label, bool is_tld_ascii); |
42 | 43 |
44 // Returns true if |hostname| or the last few components of |hostname| looks | |
45 // similar to one of top domains listed in top_domains/alexa_domains.list. Two | |
46 // checks are done: | |
47 // 1. Calculate the skeleton of |hostname| based on the Unicode confusable | |
48 // character list and look it up in the pre-calculated skeleton list of | |
49 // top domains. | |
50 // 2. Look up the diacritic-free version of |hostname| in the list of | |
51 // top domains. Note that non-IDN hostnames will not get here. | |
52 bool SimilarToTopDomains(base::StringPiece16 hostname); | |
53 | |
43 private: | 54 private: |
44 // Sets allowed characters in IDN labels and turns on USPOOF_CHAR_LIMIT. | 55 // Sets allowed characters in IDN labels and turns on USPOOF_CHAR_LIMIT. |
45 void SetAllowedUnicodeSet(UErrorCode* status); | 56 void SetAllowedUnicodeSet(UErrorCode* status); |
46 // Returns true if all the Cyrillic letters in |label| belong to a set of | 57 // Returns true if all the Cyrillic letters in |label| belong to a set of |
Peter Kasting
2017/05/15 18:55:31
Nit: I would put a blank line above this comment a
jungshik at Google
2017/05/17 23:11:04
Done.
| |
47 // Cyrillic letters that look like ASCII Latin letters. | 58 // Cyrillic letters that look like ASCII Latin letters. |
48 bool IsMadeOfLatinAlikeCyrillic(const icu::UnicodeString& label); | 59 bool IsMadeOfLatinAlikeCyrillic(const icu::UnicodeString& label); |
60 // Returns true if the confusability skeleton for |hostname| is calculated | |
61 // successfully and stored in |skeleton|. | |
Peter Kasting
2017/05/15 18:55:31
Nit: This comment sounds like the function just ch
jungshik at Google
2017/05/17 23:11:04
ooops. Yeah, it's gone a few PS's ago. Thank you f
| |
62 bool GetSkeleton(base::StringPiece16 hostname, std::string* skeleton); | |
49 | 63 |
50 USpoofChecker* checker_; | 64 USpoofChecker* checker_; |
51 icu::UnicodeSet deviation_characters_; | 65 icu::UnicodeSet deviation_characters_; |
52 icu::UnicodeSet non_ascii_latin_letters_; | 66 icu::UnicodeSet non_ascii_latin_letters_; |
53 icu::UnicodeSet kana_letters_exceptions_; | 67 icu::UnicodeSet kana_letters_exceptions_; |
68 icu::UnicodeSet combining_diacritics_exceptions_; | |
54 icu::UnicodeSet cyrillic_letters_; | 69 icu::UnicodeSet cyrillic_letters_; |
55 icu::UnicodeSet cyrillic_letters_latin_alike_; | 70 icu::UnicodeSet cyrillic_letters_latin_alike_; |
71 icu::UnicodeSet lgc_letters_n_ascii_; | |
72 std::unique_ptr<icu::Transliterator> transliterator_; | |
56 | 73 |
57 IDNSpoofChecker(const IDNSpoofChecker&) = delete; | 74 IDNSpoofChecker(const IDNSpoofChecker&) = delete; |
58 void operator=(const IDNSpoofChecker&) = delete; | 75 void operator=(const IDNSpoofChecker&) = delete; |
59 }; | 76 }; |
60 | 77 |
61 } // namespace url_formatter | 78 } // namespace url_formatter |
62 | 79 |
63 #endif // COMPONENTS_URL_FORMATTER_IDN_SPOOF_CHECKER_H_ | 80 #endif // COMPONENTS_URL_FORMATTER_IDN_SPOOF_CHECKER_H_ |
OLD | NEW |