Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/autofill/core/browser/autofill_data_util.h" | 5 #include "components/autofill/core/browser/autofill_data_util.h" |
| 6 | 6 |
| 7 #include "base/strings/utf_string_conversions.h" | 7 #include "base/strings/utf_string_conversions.h" |
| 8 #include "components/autofill/core/browser/autofill_test_utils.h" | 8 #include "components/autofill/core/browser/autofill_test_utils.h" |
| 9 #include "testing/gtest/include/gtest/gtest.h" | 9 #include "testing/gtest/include/gtest/gtest.h" |
| 10 | 10 |
| 11 namespace autofill { | 11 namespace autofill { |
| 12 namespace data_util { | 12 namespace data_util { |
| 13 | 13 |
| 14 TEST(AutofillDataUtilTest, IsCJKName) { | 14 struct IsCJKNameTestCase { |
| 15 typedef struct { | 15 const char* full_name; |
| 16 const char* full_name; | 16 bool is_cjk; |
| 17 bool is_cjk; | 17 }; |
| 18 } TestCase; | |
| 19 | 18 |
| 20 TestCase test_cases[] = { | 19 class IsCJKNameTest : public testing::TestWithParam<IsCJKNameTestCase> {}; |
| 21 // Non-CJK language with only ASCII characters. | |
| 22 {"Homer Jay Simpson", false}, | |
| 23 // Non-CJK language with some ASCII characters. | |
| 24 {"Éloïse Paré", false}, | |
| 25 // Non-CJK language with no ASCII characters. | |
| 26 {"Σωκράτης", false}, | |
| 27 | 20 |
| 28 // (Simplified) Chinese name, Unihan. | 21 TEST_P(IsCJKNameTest, IsCJKName) { |
| 29 {"刘翔", true}, | 22 auto test_case = GetParam(); |
| 30 // (Simplified) Chinese name, Unihan, with an ASCII space. | 23 EXPECT_EQ(test_case.is_cjk, IsCJKName(base::UTF8ToUTF16(test_case.full_name))) |
| 31 {"成 龙", true}, | 24 << "Failed for: " << test_case.full_name; |
| 32 // Korean name, Hangul. | |
| 33 {"송지효", true}, | |
| 34 // Korean name, Hangul, with an 'IDEOGRAPHIC SPACE' (U+3000). | |
| 35 {"김 종국", true}, | |
| 36 // Japanese name, Unihan. | |
| 37 {"山田貴洋", true}, | |
| 38 // Japanese name, Katakana, with a 'KATAKANA MIDDLE DOT' (U+30FB). | |
| 39 {"ビル・ゲイツ", true}, | |
| 40 // Japanese name, Katakana, with a 'MIDDLE DOT' (U+00B7) (likely a typo). | |
| 41 {"ビル·ゲイツ", true}, | |
| 42 | |
| 43 // CJK names don't have a middle name, so a 3-part name is bogus to us. | |
| 44 {"반 기 문", false} | |
| 45 }; | |
| 46 | |
| 47 for (const TestCase& test_case : test_cases) { | |
| 48 EXPECT_EQ(test_case.is_cjk, | |
| 49 IsCJKName(base::UTF8ToUTF16(test_case.full_name))) | |
| 50 << "Failed for: " << test_case.full_name; | |
| 51 } | |
| 52 } | 25 } |
| 53 | 26 |
| 54 TEST(AutofillDataUtilTest, SplitName) { | 27 INSTANTIATE_TEST_CASE_P( |
| 55 typedef struct { | 28 AutofillDataUtil, |
|
sebsg
2017/03/13 14:56:10
AutofillDataUtil -> AutofilDataUtilTest
Thanks!
wuandy
2017/03/14 19:05:36
Done.
| |
| 56 std::string full_name; | 29 IsCJKNameTest, |
| 57 std::string given_name; | 30 testing::Values( |
| 58 std::string middle_name; | 31 // Non-CJK language with only ASCII characters. |
| 59 std::string family_name; | 32 IsCJKNameTestCase{"Homer Jay Simpson", false}, |
| 33 // Non-CJK language with some ASCII characters. | |
| 34 IsCJKNameTestCase{"Éloïse Paré", false}, | |
| 35 // Non-CJK language with no ASCII characters. | |
| 36 IsCJKNameTestCase{"Σωκράτης", false}, | |
| 60 | 37 |
| 61 } TestCase; | 38 // (Simplified) Chinese name, Unihan. |
| 39 IsCJKNameTestCase{"刘翔", true}, | |
| 40 // (Simplified) Chinese name, Unihan, with an ASCII space. | |
| 41 IsCJKNameTestCase{"成 龙", true}, | |
| 42 // Korean name, Hangul. | |
| 43 IsCJKNameTestCase{"송지효", true}, | |
| 44 // Korean name, Hangul, with an 'IDEOGRAPHIC SPACE' (U+3000). | |
| 45 IsCJKNameTestCase{"김 종국", true}, | |
| 46 // Japanese name, Unihan. | |
| 47 IsCJKNameTestCase{"山田貴洋", true}, | |
| 48 // Japanese name, Katakana, with a 'KATAKANA MIDDLE DOT' (U+30FB). | |
| 49 IsCJKNameTestCase{"ビル・ゲイツ", true}, | |
| 50 // Japanese name, Katakana, with a 'MIDDLE DOT' (U+00B7) (likely a | |
| 51 // typo). | |
| 52 IsCJKNameTestCase{"ビル·ゲイツ", true}, | |
| 62 | 53 |
| 63 const TestCase test_cases[] = { | 54 // CJK names don't have a middle name, so a 3-part name is bogus to us. |
| 64 // Full name including given, middle and family names. | 55 IsCJKNameTestCase{"반 기 문", false})); |
| 65 {"Homer Jay Simpson", "Homer", "Jay", "Simpson"}, | |
| 66 // No middle name. | |
| 67 {"Moe Szyslak", "Moe", "", "Szyslak"}, | |
| 68 // Common name prefixes removed. | |
| 69 {"Reverend Timothy Lovejoy", "Timothy", "", "Lovejoy"}, | |
| 70 // Common name suffixes removed. | |
| 71 {"John Frink Phd", "John", "", "Frink"}, | |
| 72 // Exception to the name suffix removal. | |
| 73 {"John Ma", "John", "", "Ma"}, | |
| 74 // Common family name prefixes not considered a middle name. | |
| 75 {"Milhouse Van Houten", "Milhouse", "", "Van Houten"}, | |
| 76 | 56 |
| 77 // CJK names have reverse order (surname goes first, given name goes | 57 struct FullNameTestCase { |
| 78 // second). | 58 std::string full_name; |
| 79 {"孫 德明", "德明", "", "孫"}, // Chinese name, Unihan | 59 std::string given_name; |
| 80 {"孫 德明", "德明", "", "孫"}, // Chinese name, Unihan, 'IDEOGRAPHIC SPACE' | 60 std::string middle_name; |
| 81 {"홍 길동", "길동", "", "홍"}, // Korean name, Hangul | 61 std::string family_name; |
| 82 {"山田 貴洋", "貴洋", "", "山田"}, // Japanese name, Unihan | 62 }; |
| 83 | 63 |
| 84 // In Japanese, foreign names use 'KATAKANA MIDDLE DOT' (U+30FB) as a | 64 class SplitNameTest : public testing::TestWithParam<FullNameTestCase> {}; |
| 85 // separator. There is no consensus for the ordering. For now, we use the | |
| 86 // same ordering as regular Japanese names ("last・first"). | |
| 87 {"ゲイツ・ビル", "ビル", "", "ゲイツ"}, // Foreign name in Japanese, Katakana | |
| 88 // 'KATAKANA MIDDLE DOT' is occasionally typoed as 'MIDDLE DOT' (U+00B7). | |
| 89 {"ゲイツ·ビル", "ビル", "", "ゲイツ"}, // Foreign name in Japanese, Katakana | |
| 90 | 65 |
| 91 // CJK names don't usually have a space in the middle, but most of the | 66 TEST_P(SplitNameTest, SplitName) { |
| 92 // time, the surname is only one character (in Chinese & Korean). | 67 auto test_case = GetParam(); |
| 93 {"최성훈", "성훈", "", "최"}, // Korean name, Hangul | 68 NameParts name_parts = SplitName(base::UTF8ToUTF16(test_case.full_name)); |
| 94 {"刘翔", "翔", "", "刘"}, // (Simplified) Chinese name, Unihan | |
| 95 {"劉翔", "翔", "", "劉"}, // (Traditional) Chinese name, Unihan | |
| 96 | 69 |
| 97 // There are a few exceptions. Occasionally, the surname has two | 70 EXPECT_EQ(base::UTF8ToUTF16(test_case.given_name), name_parts.given); |
| 98 // characters. | 71 EXPECT_EQ(base::UTF8ToUTF16(test_case.middle_name), name_parts.middle); |
| 99 {"남궁도", "도", "", "남궁"}, // Korean name, Hangul | 72 EXPECT_EQ(base::UTF8ToUTF16(test_case.family_name), name_parts.family); |
| 100 {"황보혜정", "혜정", "", "황보"}, // Korean name, Hangul | |
| 101 {"歐陽靖", "靖", "", "歐陽"}, // (Traditional) Chinese name, Unihan | |
| 102 | |
| 103 // In Korean, some 2-character surnames are rare/ambiguous, like "강전": | |
| 104 // "강" is a common surname, and "전" can be part of a given name. In | |
| 105 // those cases, we assume it's 1/2 for 3-character names, or 2/2 for | |
| 106 // 4-character names. | |
| 107 {"강전희", "전희", "", "강"}, // Korean name, Hangul | |
| 108 {"황목치승", "치승", "", "황목"}, // Korean name, Hangul | |
| 109 | |
| 110 // It occasionally happens that a full name is 2 characters, 1/1. | |
| 111 {"이도", "도", "", "이"}, // Korean name, Hangul | |
| 112 {"孫文", "文", "", "孫"} // Chinese name, Unihan | |
| 113 }; | |
| 114 | |
| 115 for (TestCase test_case : test_cases) { | |
| 116 NameParts name_parts = SplitName(base::UTF8ToUTF16(test_case.full_name)); | |
| 117 | |
| 118 EXPECT_EQ(base::UTF8ToUTF16(test_case.given_name), name_parts.given); | |
| 119 EXPECT_EQ(base::UTF8ToUTF16(test_case.middle_name), name_parts.middle); | |
| 120 EXPECT_EQ(base::UTF8ToUTF16(test_case.family_name), name_parts.family); | |
| 121 } | |
| 122 } | 73 } |
| 123 | 74 |
| 124 TEST(AutofillDataUtilTest, JoinNameParts) { | 75 INSTANTIATE_TEST_CASE_P( |
| 125 typedef struct { | 76 AutofillDataUtil, |
| 126 std::string given_name; | 77 SplitNameTest, |
| 127 std::string middle_name; | 78 testing::Values( |
| 128 std::string family_name; | 79 // Full name including given, middle and family names. |
| 129 std::string full_name; | 80 FullNameTestCase{"Homer Jay Simpson", "Homer", "Jay", "Simpson"}, |
| 130 } TestCase; | 81 // No middle name. |
| 82 FullNameTestCase{"Moe Szyslak", "Moe", "", "Szyslak"}, | |
| 83 // Common name prefixes removed. | |
| 84 FullNameTestCase{"Reverend Timothy Lovejoy", "Timothy", "", "Lovejoy"}, | |
| 85 // Common name suffixes removed. | |
| 86 FullNameTestCase{"John Frink Phd", "John", "", "Frink"}, | |
| 87 // Exception to the name suffix removal. | |
| 88 FullNameTestCase{"John Ma", "John", "", "Ma"}, | |
| 89 // Common family name prefixes not considered a middle name. | |
| 90 FullNameTestCase{"Milhouse Van Houten", "Milhouse", "", "Van Houten"}, | |
| 131 | 91 |
| 132 TestCase test_cases[] = { | 92 // CJK names have reverse order (surname goes first, given name goes |
| 133 // Full name including given, middle and family names. | 93 // second). |
| 134 {"Homer", "Jay", "Simpson", "Homer Jay Simpson"}, | 94 FullNameTestCase{"孫 德明", "德明", "", "孫"}, // Chinese name, Unihan |
| 135 // No middle name. | 95 FullNameTestCase{"孫 德明", "德明", "", |
| 136 {"Moe", "", "Szyslak", "Moe Szyslak"}, | 96 "孫"}, // Chinese name, Unihan, 'IDEOGRAPHIC SPACE' |
| 97 FullNameTestCase{"홍 길동", "길동", "", "홍"}, // Korean name, Hangul | |
| 98 FullNameTestCase{"山田 貴洋", "貴洋", "", | |
| 99 "山田"}, // Japanese name, Unihan | |
| 137 | 100 |
| 138 // CJK names have reversed order, no space. | 101 // In Japanese, foreign names use 'KATAKANA MIDDLE DOT' (U+30FB) as a |
| 139 {"德明", "", "孫", "孫德明"}, // Chinese name, Unihan | 102 // separator. There is no consensus for the ordering. For now, we use |
| 140 {"길동", "", "홍", "홍길동"}, // Korean name, Hangul | 103 // the same ordering as regular Japanese names ("last・first"). |
| 141 {"貴洋", "", "山田", "山田貴洋"}, // Japanese name, Unihan | 104 FullNameTestCase{"ゲイツ・ビル", "ビル", "", |
| 105 "ゲイツ"}, // Foreign name in Japanese, Katakana | |
| 106 // 'KATAKANA MIDDLE DOT' is occasionally typoed as 'MIDDLE DOT' | |
| 107 // (U+00B7). | |
| 108 FullNameTestCase{"ゲイツ·ビル", "ビル", "", | |
| 109 "ゲイツ"}, // Foreign name in Japanese, Katakana | |
| 142 | 110 |
| 143 // These are no CJK names for us, they're just bogus. | 111 // CJK names don't usually have a space in the middle, but most of the |
| 144 {"Homer", "", "シンプソン", "Homer シンプソン"}, | 112 // time, the surname is only one character (in Chinese & Korean). |
| 145 {"ホーマー", "", "Simpson", "ホーマー Simpson"}, | 113 FullNameTestCase{"최성훈", "성훈", "", "최"}, // Korean name, Hangul |
| 146 {"반", "기", "문", "반 기 문"} // Has a middle-name, too unusual | 114 FullNameTestCase{"刘翔", "翔", "", |
| 147 }; | 115 "刘"}, // (Simplified) Chinese name, Unihan |
| 116 FullNameTestCase{"劉翔", "翔", "", | |
| 117 "劉"}, // (Traditional) Chinese name, Unihan | |
| 148 | 118 |
| 149 for (const TestCase& test_case : test_cases) { | 119 // There are a few exceptions. Occasionally, the surname has two |
| 150 base::string16 joined = JoinNameParts( | 120 // characters. |
| 151 base::UTF8ToUTF16(test_case.given_name), | 121 FullNameTestCase{"남궁도", "도", "", "남궁"}, // Korean name, Hangul |
| 152 base::UTF8ToUTF16(test_case.middle_name), | 122 FullNameTestCase{"황보혜정", "혜정", "", |
| 153 base::UTF8ToUTF16(test_case.family_name)); | 123 "황보"}, // Korean name, Hangul |
| 124 FullNameTestCase{"歐陽靖", "靖", "", | |
| 125 "歐陽"}, // (Traditional) Chinese name, Unihan | |
| 154 | 126 |
| 155 EXPECT_EQ(base::UTF8ToUTF16(test_case.full_name), joined); | 127 // In Korean, some 2-character surnames are rare/ambiguous, like "강전": |
| 156 } | 128 // "강" is a common surname, and "전" can be part of a given name. In |
| 129 // those cases, we assume it's 1/2 for 3-character names, or 2/2 for | |
| 130 // 4-character names. | |
| 131 FullNameTestCase{"강전희", "전희", "", "강"}, // Korean name, Hangul | |
| 132 FullNameTestCase{"황목치승", "치승", "", | |
| 133 "황목"}, // Korean name, Hangul | |
| 134 | |
| 135 // It occasionally happens that a full name is 2 characters, 1/1. | |
| 136 FullNameTestCase{"이도", "도", "", "이"}, // Korean name, Hangul | |
| 137 FullNameTestCase{"孫文", "文", "", "孫"} // Chinese name, Unihan | |
| 138 )); | |
| 139 | |
| 140 class JoinNamePartsTest : public testing::TestWithParam<FullNameTestCase> {}; | |
| 141 | |
| 142 TEST_P(JoinNamePartsTest, JoinNameParts) { | |
| 143 auto test_case = GetParam(); | |
| 144 base::string16 joined = | |
| 145 JoinNameParts(base::UTF8ToUTF16(test_case.given_name), | |
| 146 base::UTF8ToUTF16(test_case.middle_name), | |
| 147 base::UTF8ToUTF16(test_case.family_name)); | |
| 148 | |
| 149 EXPECT_EQ(base::UTF8ToUTF16(test_case.full_name), joined); | |
| 157 } | 150 } |
| 158 | 151 |
| 152 INSTANTIATE_TEST_CASE_P( | |
| 153 AutofillDataUtil, | |
| 154 JoinNamePartsTest, | |
| 155 testing::Values( | |
| 156 // Full name including given, middle and family names. | |
| 157 FullNameTestCase{"Homer Jay Simpson", "Homer", "Jay", "Simpson"}, | |
| 158 // No middle name. | |
| 159 FullNameTestCase{"Moe Szyslak", "Moe", "", "Szyslak"}, | |
| 160 | |
| 161 // CJK names have reversed order, no space. | |
| 162 FullNameTestCase{"孫德明", "德明", "", "孫"}, // Chinese name, Unihan | |
| 163 FullNameTestCase{"홍길동", "길동", "", "홍"}, // Korean name, Hangul | |
| 164 FullNameTestCase{"山田貴洋", "貴洋", "", | |
| 165 "山田"}, // Japanese name, Unihan | |
| 166 | |
| 167 // These are no CJK names for us, they're just bogus. | |
| 168 FullNameTestCase{"Homer シンプソン", "Homer", "", "シンプソン"}, | |
| 169 FullNameTestCase{"ホーマー Simpson", "ホーマー", "", "Simpson"}, | |
| 170 FullNameTestCase{"반 기 문", "반", "기", "문"} | |
| 171 // Has a middle-name, too unusual | |
| 172 )); | |
| 173 | |
| 159 TEST(AutofillDataUtilTest, ProfileMatchesFullName) { | 174 TEST(AutofillDataUtilTest, ProfileMatchesFullName) { |
| 160 autofill::AutofillProfile profile; | 175 autofill::AutofillProfile profile; |
| 161 autofill::test::SetProfileInfo( | 176 autofill::test::SetProfileInfo( |
| 162 &profile, "First", "Middle", "Last", "fml@example.com", "Acme inc", | 177 &profile, "First", "Middle", "Last", "fml@example.com", "Acme inc", |
| 163 "123 Main", "Apt 2", "Laredo", "TX", "77300", "US", "832-555-1000"); | 178 "123 Main", "Apt 2", "Laredo", "TX", "77300", "US", "832-555-1000"); |
| 164 | 179 |
| 165 EXPECT_TRUE(ProfileMatchesFullName(base::UTF8ToUTF16("First Last"), profile)); | 180 EXPECT_TRUE(ProfileMatchesFullName(base::UTF8ToUTF16("First Last"), profile)); |
| 166 | 181 |
| 167 EXPECT_TRUE( | 182 EXPECT_TRUE( |
| 168 ProfileMatchesFullName(base::UTF8ToUTF16("First Middle Last"), profile)); | 183 ProfileMatchesFullName(base::UTF8ToUTF16("First Middle Last"), profile)); |
| 169 | 184 |
| 170 EXPECT_TRUE( | 185 EXPECT_TRUE( |
| 171 ProfileMatchesFullName(base::UTF8ToUTF16("First M Last"), profile)); | 186 ProfileMatchesFullName(base::UTF8ToUTF16("First M Last"), profile)); |
| 172 | 187 |
| 173 EXPECT_TRUE( | 188 EXPECT_TRUE( |
| 174 ProfileMatchesFullName(base::UTF8ToUTF16("First M. Last"), profile)); | 189 ProfileMatchesFullName(base::UTF8ToUTF16("First M. Last"), profile)); |
| 175 | 190 |
| 176 EXPECT_TRUE( | 191 EXPECT_TRUE( |
| 177 ProfileMatchesFullName(base::UTF8ToUTF16("Last First"), profile)); | 192 ProfileMatchesFullName(base::UTF8ToUTF16("Last First"), profile)); |
| 178 | 193 |
| 179 EXPECT_TRUE( | 194 EXPECT_TRUE( |
| 180 ProfileMatchesFullName(base::UTF8ToUTF16("LastFirst"), profile)); | 195 ProfileMatchesFullName(base::UTF8ToUTF16("LastFirst"), profile)); |
| 181 | 196 |
| 182 EXPECT_FALSE( | 197 EXPECT_FALSE( |
| 183 ProfileMatchesFullName(base::UTF8ToUTF16("Kirby Puckett"), profile)); | 198 ProfileMatchesFullName(base::UTF8ToUTF16("Kirby Puckett"), profile)); |
| 184 } | 199 } |
| 185 | 200 |
| 186 } // namespace data_util | 201 } // namespace data_util |
| 187 } // namespace autofill | 202 } // namespace autofill |
| OLD | NEW |