Index: components/autofill/core/browser/autofill_data_util.cc |
diff --git a/components/autofill/core/browser/autofill_data_util.cc b/components/autofill/core/browser/autofill_data_util.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..63dc48213a01f9e4e7d1b95df2df7717d69696f7 |
--- /dev/null |
+++ b/components/autofill/core/browser/autofill_data_util.cc |
@@ -0,0 +1,132 @@ |
+// Copyright 2016 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "components/autofill/core/browser/autofill_data_util.h" |
+ |
+#include <vector> |
+ |
+#include "base/strings/string_split.h" |
+#include "base/strings/string_util.h" |
+#include "base/strings/utf_string_conversions.h" |
+ |
+namespace autofill { |
+namespace data_util { |
+ |
+namespace { |
+const char* const name_prefixes[] = { |
+ "1lt", "1st", "2lt", "2nd", "3rd", "admiral", "capt", |
+ "captain", "col", "cpt", "dr", "gen", "general", "lcdr", |
+ "lt", "ltc", "ltg", "ltjg", "maj", "major", "mg", |
+ "mr", "mrs", "ms", "pastor", "prof", "rep", "reverend", |
+ "rev", "sen", "st"}; |
+ |
+const char* const name_suffixes[] = {"b.a", "ba", "d.d.s", "dds", "i", "ii", |
+ "iii", "iv", "ix", "jr", "m.a", "m.d", |
+ "ma", "md", "ms", "ph.d", "phd", "sr", |
+ "v", "vi", "vii", "viii", "x"}; |
+ |
+const char* const family_name_prefixes[] = {"d'", "de", "del", "der", "di", |
+ "la", "le", "mc", "san", "st", |
+ "ter", "van", "von"}; |
+ |
+// Returns true if |set| contains |element|, modulo a final period. |
+bool ContainsString(const char* const set[], |
+ size_t set_size, |
+ const base::string16& element) { |
+ if (!base::IsStringASCII(element)) |
+ return false; |
+ |
+ base::string16 trimmed_element; |
+ base::TrimString(element, base::ASCIIToUTF16("."), &trimmed_element); |
+ |
+ for (size_t i = 0; i < set_size; ++i) { |
+ if (base::LowerCaseEqualsASCII(trimmed_element, set[i])) |
+ return true; |
+ } |
+ |
+ return false; |
+} |
+ |
+// Removes common name prefixes from |name_tokens|. |
+void StripPrefixes(std::vector<base::string16>* name_tokens) { |
+ std::vector<base::string16>::iterator iter = name_tokens->begin(); |
+ while (iter != name_tokens->end()) { |
+ if (!ContainsString(name_prefixes, arraysize(name_prefixes), *iter)) |
+ break; |
+ ++iter; |
+ } |
+ |
+ std::vector<base::string16> copy_vector; |
+ copy_vector.assign(iter, name_tokens->end()); |
+ *name_tokens = copy_vector; |
+} |
+ |
+// Removes common name suffixes from |name_tokens|. |
+void StripSuffixes(std::vector<base::string16>* name_tokens) { |
+ while (!name_tokens->empty()) { |
+ if (!ContainsString(name_suffixes, arraysize(name_suffixes), |
+ name_tokens->back())) { |
+ break; |
+ } |
+ name_tokens->pop_back(); |
+ } |
+} |
+ |
+} // namespace |
+ |
+NameParts SplitName(const base::string16& name) { |
+ std::vector<base::string16> name_tokens = |
+ base::SplitString(name, base::ASCIIToUTF16(" ,"), base::KEEP_WHITESPACE, |
+ base::SPLIT_WANT_NONEMPTY); |
+ StripPrefixes(&name_tokens); |
+ |
+ // Don't assume "Ma" is a suffix in John Ma. |
+ if (name_tokens.size() > 2) |
+ StripSuffixes(&name_tokens); |
+ |
+ NameParts parts; |
+ |
+ if (name_tokens.empty()) { |
+ // Bad things have happened; just assume the whole thing is a given name. |
+ parts.given = name; |
+ return parts; |
+ } |
+ |
+ // Only one token, assume given name. |
+ if (name_tokens.size() == 1) { |
+ parts.given = name_tokens[0]; |
+ return parts; |
+ } |
+ |
+ // 2 or more tokens. Grab the family, which is the last word plus any |
+ // recognizable family prefixes. |
+ std::vector<base::string16> reverse_family_tokens; |
+ reverse_family_tokens.push_back(name_tokens.back()); |
+ name_tokens.pop_back(); |
+ while (name_tokens.size() >= 1 && |
+ ContainsString(family_name_prefixes, arraysize(family_name_prefixes), |
+ name_tokens.back())) { |
+ reverse_family_tokens.push_back(name_tokens.back()); |
+ name_tokens.pop_back(); |
+ } |
+ |
+ std::vector<base::string16> family_tokens(reverse_family_tokens.rbegin(), |
+ reverse_family_tokens.rend()); |
+ parts.family = base::JoinString(family_tokens, base::ASCIIToUTF16(" ")); |
+ |
+ // Take the last remaining token as the middle name (if there are at least 2 |
+ // tokens). |
+ if (name_tokens.size() >= 2) { |
+ parts.middle = name_tokens.back(); |
+ name_tokens.pop_back(); |
+ } |
+ |
+ // Remainder is given name. |
+ parts.given = base::JoinString(name_tokens, base::ASCIIToUTF16(" ")); |
+ |
+ return parts; |
+} |
+ |
+} // namespace data_util |
+} // namespace autofill |