OLD | NEW |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/autofill/core/browser/autofill_data_util.h" | 5 #include "components/autofill/core/browser/autofill_data_util.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <vector> | 8 #include <vector> |
9 | 9 |
10 #include "base/i18n/char_iterator.h" | 10 #include "base/i18n/char_iterator.h" |
| 11 #include "base/strings/string_piece.h" |
11 #include "base/strings/string_split.h" | 12 #include "base/strings/string_split.h" |
12 #include "base/strings/string_util.h" | 13 #include "base/strings/string_util.h" |
13 #include "base/strings/utf_string_conversions.h" | 14 #include "base/strings/utf_string_conversions.h" |
14 #include "components/autofill/core/browser/field_types.h" | 15 #include "components/autofill/core/browser/field_types.h" |
15 #include "components/grit/components_scaled_resources.h" | 16 #include "components/grit/components_scaled_resources.h" |
16 #include "components/strings/grit/components_strings.h" | 17 #include "components/strings/grit/components_strings.h" |
17 #include "third_party/icu/source/common/unicode/uscript.h" | 18 #include "third_party/icu/source/common/unicode/uscript.h" |
18 #include "third_party/re2/src/re2/re2.h" | 19 #include "third_party/re2/src/re2/re2.h" |
19 | 20 |
20 namespace autofill { | 21 namespace autofill { |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
59 // The common and non-ambiguous CJK surnames (last names) that have more than | 60 // The common and non-ambiguous CJK surnames (last names) that have more than |
60 // one character. | 61 // one character. |
61 const char* common_cjk_multi_char_surnames[] = { | 62 const char* common_cjk_multi_char_surnames[] = { |
62 // Korean, taken from the list of surnames: | 63 // Korean, taken from the list of surnames: |
63 // https://ko.wikipedia.org/wiki/%ED%95%9C%EA%B5%AD%EC%9D%98_%EC%84%B1%EC%94%A
8_%EB%AA%A9%EB%A1%9D | 64 // https://ko.wikipedia.org/wiki/%ED%95%9C%EA%B5%AD%EC%9D%98_%EC%84%B1%EC%94%A
8_%EB%AA%A9%EB%A1%9D |
64 "남궁", "사공", "서문", "선우", "제갈", "황보", "독고", "망절", | 65 "남궁", "사공", "서문", "선우", "제갈", "황보", "독고", "망절", |
65 | 66 |
66 // Chinese, taken from the top 10 Chinese 2-character surnames: | 67 // Chinese, taken from the top 10 Chinese 2-character surnames: |
67 // https://zh.wikipedia.org/wiki/%E8%A4%87%E5%A7%93#.E5.B8.B8.E8.A6.8B.E7.9A.8
4.E8.A4.87.E5.A7.93 | 68 // https://zh.wikipedia.org/wiki/%E8%A4%87%E5%A7%93#.E5.B8.B8.E8.A6.8B.E7.9A.8
4.E8.A4.87.E5.A7.93 |
68 // Simplified Chinese (mostly mainland China) | 69 // Simplified Chinese (mostly mainland China) |
69 "欧阳", "令狐", "皇甫", "上官", "司徒", "诸葛", "司马", "宇文", "呼延", "端木", | 70 "欧阳", "令狐", "皇甫", "上官", "司徒", "诸葛", "司马", "宇文", "呼延", |
| 71 "端木", |
70 // Traditional Chinese (mostly Taiwan) | 72 // Traditional Chinese (mostly Taiwan) |
71 "張簡", "歐陽", "諸葛", "申屠", "尉遲", "司馬", "軒轅", "夏侯" | 73 "張簡", "歐陽", "諸葛", "申屠", "尉遲", "司馬", "軒轅", "夏侯"}; |
72 }; | |
73 | 74 |
74 // All Korean surnames that have more than one character, even the | 75 // All Korean surnames that have more than one character, even the |
75 // rare/ambiguous ones. | 76 // rare/ambiguous ones. |
76 const char* korean_multi_char_surnames[] = { | 77 const char* korean_multi_char_surnames[] = { |
77 "강전", "남궁", "독고", "동방", "망절", "사공", "서문", "선우", | 78 "강전", "남궁", "독고", "동방", "망절", "사공", "서문", |
78 "소봉", "어금", "장곡", "제갈", "황목", "황보" | 79 "선우", "소봉", "어금", "장곡", "제갈", "황목", "황보"}; |
79 }; | |
80 | 80 |
81 // Returns true if |set| contains |element|, modulo a final period. | 81 // Returns true if |set| contains |element|, modulo a final period. |
82 bool ContainsString(const char* const set[], | 82 bool ContainsString(const char* const set[], |
83 size_t set_size, | 83 size_t set_size, |
84 base::StringPiece16 element) { | 84 base::StringPiece16 element) { |
85 if (!base::IsStringASCII(element)) | 85 if (!base::IsStringASCII(element)) |
86 return false; | 86 return false; |
87 | 87 |
88 base::StringPiece16 trimmed_element = | 88 base::StringPiece16 trimmed_element = |
89 base::TrimString(element, base::ASCIIToUTF16("."), base::TRIM_ALL); | 89 base::TrimString(element, base::ASCIIToUTF16("."), base::TRIM_ALL); |
(...skipping 27 matching lines...) Expand all Loading... |
117 name_tokens->back())) { | 117 name_tokens->back())) { |
118 break; | 118 break; |
119 } | 119 } |
120 name_tokens->pop_back(); | 120 name_tokens->pop_back(); |
121 } | 121 } |
122 } | 122 } |
123 | 123 |
124 // Find whether |name| starts with any of the strings from the array | 124 // Find whether |name| starts with any of the strings from the array |
125 // |prefixes|. The returned value is the length of the prefix found, or 0 if | 125 // |prefixes|. The returned value is the length of the prefix found, or 0 if |
126 // none is found. | 126 // none is found. |
127 size_t StartsWithAny(base::StringPiece16 name, const char** prefixes, | 127 size_t StartsWithAny(base::StringPiece16 name, |
| 128 const char** prefixes, |
128 size_t prefix_count) { | 129 size_t prefix_count) { |
129 base::string16 buffer; | 130 base::string16 buffer; |
130 for (size_t i = 0; i < prefix_count; i++) { | 131 for (size_t i = 0; i < prefix_count; i++) { |
131 buffer.clear(); | 132 buffer.clear(); |
132 base::UTF8ToUTF16(prefixes[i], strlen(prefixes[i]), &buffer); | 133 base::UTF8ToUTF16(prefixes[i], strlen(prefixes[i]), &buffer); |
133 if (base::StartsWith(name, buffer, base::CompareCase::SENSITIVE)) { | 134 if (base::StartsWith(name, buffer, base::CompareCase::SENSITIVE)) { |
134 return buffer.size(); | 135 return buffer.size(); |
135 } | 136 } |
136 } | 137 } |
137 return 0; | 138 return 0; |
138 } | 139 } |
139 | 140 |
140 // Returns true if |c| is a CJK (Chinese, Japanese, Korean) character, for any | 141 // Returns true if |c| is a CJK (Chinese, Japanese, Korean) character, for any |
141 // of the CJK alphabets. | 142 // of the CJK alphabets. |
142 bool IsCJKCharacter(UChar32 c) { | 143 bool IsCJKCharacter(UChar32 c) { |
143 UErrorCode error = U_ZERO_ERROR; | 144 UErrorCode error = U_ZERO_ERROR; |
144 switch (uscript_getScript(c, &error)) { | 145 switch (uscript_getScript(c, &error)) { |
145 case USCRIPT_HAN: // CJK logographs, used by all 3 (but rarely for Korean) | 146 case USCRIPT_HAN: // CJK logographs, used by all 3 (but rarely for Korean) |
146 case USCRIPT_HANGUL: // Korean alphabet | 147 case USCRIPT_HANGUL: // Korean alphabet |
147 case USCRIPT_KATAKANA: // A Japanese syllabary | 148 case USCRIPT_KATAKANA: // A Japanese syllabary |
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
255 if (is_cjk && !previous_was_cjk) { | 256 if (is_cjk && !previous_was_cjk) { |
256 word_count++; | 257 word_count++; |
257 } | 258 } |
258 previous_was_cjk = is_cjk; | 259 previous_was_cjk = is_cjk; |
259 } | 260 } |
260 return word_count > 0 && word_count < 3; | 261 return word_count > 0 && word_count < 3; |
261 } | 262 } |
262 | 263 |
263 NameParts SplitName(base::StringPiece16 name) { | 264 NameParts SplitName(base::StringPiece16 name) { |
264 static const base::char16 kWordSeparators[] = { | 265 static const base::char16 kWordSeparators[] = { |
265 u' ', // ASCII space. | 266 u' ', // ASCII space. |
266 u',', // ASCII comma. | 267 u',', // ASCII comma. |
267 u'\u3000', // 'IDEOGRAPHIC SPACE' (U+3000). | 268 u'\u3000', // 'IDEOGRAPHIC SPACE' (U+3000). |
268 u'\u30FB', // 'KATAKANA MIDDLE DOT' (U+30FB). | 269 u'\u30FB', // 'KATAKANA MIDDLE DOT' (U+30FB). |
269 u'\u00B7', // 'MIDDLE DOT' (U+00B7). | 270 u'\u00B7', // 'MIDDLE DOT' (U+00B7). |
270 u'\0' // End of string. | 271 u'\0' // End of string. |
271 }; | 272 }; |
272 std::vector<base::StringPiece16> name_tokens = base::SplitStringPiece( | 273 std::vector<base::StringPiece16> name_tokens = base::SplitStringPiece( |
273 name, kWordSeparators, base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY); | 274 name, kWordSeparators, base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY); |
274 StripPrefixes(&name_tokens); | 275 StripPrefixes(&name_tokens); |
275 | 276 |
276 NameParts parts; | 277 NameParts parts; |
277 | 278 |
278 // TODO(crbug.com/89111): Hungarian, Tamil, Telugu, and Vietnamese also have | 279 // TODO(crbug.com/89111): Hungarian, Tamil, Telugu, and Vietnamese also have |
279 // the given name before the surname, and should be treated as special cases | 280 // the given name before the surname, and should be treated as special cases |
280 // too. | 281 // too. |
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
399 // LastFirst | 400 // LastFirst |
400 candidate = profile.GetRawInfo(autofill::NAME_LAST) + | 401 candidate = profile.GetRawInfo(autofill::NAME_LAST) + |
401 profile.GetRawInfo(autofill::NAME_FIRST); | 402 profile.GetRawInfo(autofill::NAME_FIRST); |
402 if (!full_name.compare(candidate)) { | 403 if (!full_name.compare(candidate)) { |
403 return true; | 404 return true; |
404 } | 405 } |
405 | 406 |
406 return false; | 407 return false; |
407 } | 408 } |
408 | 409 |
409 const PaymentRequestData& GetPaymentRequestData(const std::string& type) { | 410 const PaymentRequestData& GetPaymentRequestData( |
| 411 const std::string& issuer_network) { |
410 for (const PaymentRequestData& data : kPaymentRequestData) { | 412 for (const PaymentRequestData& data : kPaymentRequestData) { |
411 if (type == data.card_type) | 413 if (issuer_network == data.issuer_network) |
412 return data; | 414 return data; |
413 } | 415 } |
414 return kGenericPaymentRequestData; | 416 return kGenericPaymentRequestData; |
415 } | 417 } |
416 | 418 |
417 const char* GetCardTypeForBasicCardPaymentType( | 419 const char* GetIssuerNetworkForBasicCardIssuerNetwork( |
418 const std::string& basic_card_payment_type) { | 420 const std::string& basic_card_issuer_network) { |
419 for (const PaymentRequestData& data : kPaymentRequestData) { | 421 for (const PaymentRequestData& data : kPaymentRequestData) { |
420 if (basic_card_payment_type == data.basic_card_payment_type) { | 422 if (basic_card_issuer_network == data.basic_card_issuer_network) { |
421 return data.card_type; | 423 return data.issuer_network; |
422 } | 424 } |
423 } | 425 } |
424 return kGenericPaymentRequestData.card_type; | 426 return kGenericPaymentRequestData.issuer_network; |
425 } | 427 } |
426 | 428 |
427 bool IsValidCountryCode(const std::string& country_code) { | 429 bool IsValidCountryCode(const std::string& country_code) { |
428 if (country_code.size() != 2) | 430 if (country_code.size() != 2) |
429 return false; | 431 return false; |
430 | 432 |
431 return re2::RE2::FullMatch(country_code, "^[A-Z]{2}$"); | 433 return re2::RE2::FullMatch(country_code, "^[A-Z]{2}$"); |
432 } | 434 } |
433 | 435 |
434 bool IsValidCountryCode(const base::string16& country_code) { | 436 bool IsValidCountryCode(const base::string16& country_code) { |
435 return IsValidCountryCode(base::UTF16ToUTF8(country_code)); | 437 return IsValidCountryCode(base::UTF16ToUTF8(country_code)); |
436 } | 438 } |
437 | 439 |
438 } // namespace data_util | 440 } // namespace data_util |
439 } // namespace autofill | 441 } // namespace autofill |
OLD | NEW |