OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "components/autofill/core/browser/autofill_data_util.h" |
| 6 |
| 7 #include <vector> |
| 8 |
| 9 #include "base/strings/string_split.h" |
| 10 #include "base/strings/string_util.h" |
| 11 #include "base/strings/utf_string_conversions.h" |
| 12 |
| 13 namespace autofill { |
| 14 namespace data_util { |
| 15 |
| 16 namespace { |
| 17 const char* const name_prefixes[] = { |
| 18 "1lt", "1st", "2lt", "2nd", "3rd", "admiral", "capt", |
| 19 "captain", "col", "cpt", "dr", "gen", "general", "lcdr", |
| 20 "lt", "ltc", "ltg", "ltjg", "maj", "major", "mg", |
| 21 "mr", "mrs", "ms", "pastor", "prof", "rep", "reverend", |
| 22 "rev", "sen", "st"}; |
| 23 |
| 24 const char* const name_suffixes[] = {"b.a", "ba", "d.d.s", "dds", "i", "ii", |
| 25 "iii", "iv", "ix", "jr", "m.a", "m.d", |
| 26 "ma", "md", "ms", "ph.d", "phd", "sr", |
| 27 "v", "vi", "vii", "viii", "x"}; |
| 28 |
| 29 const char* const family_name_prefixes[] = {"d'", "de", "del", "der", "di", |
| 30 "la", "le", "mc", "san", "st", |
| 31 "ter", "van", "von"}; |
| 32 |
| 33 // Returns true if |set| contains |element|, modulo a final period. |
| 34 bool ContainsString(const char* const set[], |
| 35 size_t set_size, |
| 36 const base::string16& element) { |
| 37 if (!base::IsStringASCII(element)) |
| 38 return false; |
| 39 |
| 40 base::string16 trimmed_element; |
| 41 base::TrimString(element, base::ASCIIToUTF16("."), &trimmed_element); |
| 42 |
| 43 for (size_t i = 0; i < set_size; ++i) { |
| 44 if (base::LowerCaseEqualsASCII(trimmed_element, set[i])) |
| 45 return true; |
| 46 } |
| 47 |
| 48 return false; |
| 49 } |
| 50 |
| 51 // Removes common name prefixes from |name_tokens|. |
| 52 void StripPrefixes(std::vector<base::string16>* name_tokens) { |
| 53 std::vector<base::string16>::iterator iter = name_tokens->begin(); |
| 54 while (iter != name_tokens->end()) { |
| 55 if (!ContainsString(name_prefixes, arraysize(name_prefixes), *iter)) |
| 56 break; |
| 57 ++iter; |
| 58 } |
| 59 |
| 60 std::vector<base::string16> copy_vector; |
| 61 copy_vector.assign(iter, name_tokens->end()); |
| 62 *name_tokens = copy_vector; |
| 63 } |
| 64 |
| 65 // Removes common name suffixes from |name_tokens|. |
| 66 void StripSuffixes(std::vector<base::string16>* name_tokens) { |
| 67 while (!name_tokens->empty()) { |
| 68 if (!ContainsString(name_suffixes, arraysize(name_suffixes), |
| 69 name_tokens->back())) { |
| 70 break; |
| 71 } |
| 72 name_tokens->pop_back(); |
| 73 } |
| 74 } |
| 75 |
| 76 } // namespace |
| 77 |
| 78 NameParts SplitName(const base::string16& name) { |
| 79 std::vector<base::string16> name_tokens = |
| 80 base::SplitString(name, base::ASCIIToUTF16(" ,"), base::KEEP_WHITESPACE, |
| 81 base::SPLIT_WANT_NONEMPTY); |
| 82 StripPrefixes(&name_tokens); |
| 83 |
| 84 // Don't assume "Ma" is a suffix in John Ma. |
| 85 if (name_tokens.size() > 2) |
| 86 StripSuffixes(&name_tokens); |
| 87 |
| 88 NameParts parts; |
| 89 |
| 90 if (name_tokens.empty()) { |
| 91 // Bad things have happened; just assume the whole thing is a given name. |
| 92 parts.given = name; |
| 93 return parts; |
| 94 } |
| 95 |
| 96 // Only one token, assume given name. |
| 97 if (name_tokens.size() == 1) { |
| 98 parts.given = name_tokens[0]; |
| 99 return parts; |
| 100 } |
| 101 |
| 102 // 2 or more tokens. Grab the family, which is the last word plus any |
| 103 // recognizable family prefixes. |
| 104 std::vector<base::string16> reverse_family_tokens; |
| 105 reverse_family_tokens.push_back(name_tokens.back()); |
| 106 name_tokens.pop_back(); |
| 107 while (name_tokens.size() >= 1 && |
| 108 ContainsString(family_name_prefixes, arraysize(family_name_prefixes), |
| 109 name_tokens.back())) { |
| 110 reverse_family_tokens.push_back(name_tokens.back()); |
| 111 name_tokens.pop_back(); |
| 112 } |
| 113 |
| 114 std::vector<base::string16> family_tokens(reverse_family_tokens.rbegin(), |
| 115 reverse_family_tokens.rend()); |
| 116 parts.family = base::JoinString(family_tokens, base::ASCIIToUTF16(" ")); |
| 117 |
| 118 // Take the last remaining token as the middle name (if there are at least 2 |
| 119 // tokens). |
| 120 if (name_tokens.size() >= 2) { |
| 121 parts.middle = name_tokens.back(); |
| 122 name_tokens.pop_back(); |
| 123 } |
| 124 |
| 125 // Remainder is given name. |
| 126 parts.given = base::JoinString(name_tokens, base::ASCIIToUTF16(" ")); |
| 127 |
| 128 return parts; |
| 129 } |
| 130 |
| 131 } // namespace data_util |
| 132 } // namespace autofill |
OLD | NEW |