Chromium Code Reviews| Index: components/autofill/core/browser/contact_info.cc |
| diff --git a/components/autofill/core/browser/contact_info.cc b/components/autofill/core/browser/contact_info.cc |
| index 0793bb5ff310c507941a7edd69bca4dfe57345b3..6f16c47e0a025190363e917c7a34930a3847879b 100644 |
| --- a/components/autofill/core/browser/contact_info.cc |
| +++ b/components/autofill/core/browser/contact_info.cc |
| @@ -16,6 +16,130 @@ |
| namespace autofill { |
| +namespace { |
| + |
| +static const char* const name_prefixes[] = { |
|
Ilya Sherman
2014/06/03 22:53:35
nit: No need for "static"; you're already in an an
Evan Stade
2014/06/03 23:55:35
Done.
|
| + "1lt", "1st", "2lt", "2nd", "3rd", "admiral", "capt", "captain", "col", |
| + "cpt", "dr", "gen", "general", "lcdr", "lt", "ltc", "ltg", "ltjg", "maj", |
| + "major", "mg", "mr", "mrs", "ms", "pastor", "prof", "rep", "reverend", |
| + "rev", "sen", "st" }; |
| + |
| +static const char* const name_suffixes[] = { |
| + "b.a", "ba", "d.d.s", "dds", "i", "ii", "iii", "iv", "ix", "jr", "m.a", |
| + "m.d", "ma", "md", "ms", "ph.d", "phd", "sr", "v", "vi", "vii", "viii", |
| + "x" }; |
| + |
| +static const char* const family_name_prefixes[] = { |
| + "d\'", "de", "del", "der", "di", "la", "le", "mc", "san", "st", "ter", |
|
Ilya Sherman
2014/06/03 22:53:35
Out of curiousity, why does the apostrophe need to
Evan Stade
2014/06/03 23:55:35
I just copy-pasta'd that. Turns out it doesn't nee
|
| + "van", "von" }; |
| + |
| +// Returns true if |set| contains |element|, modulo a final period. |
| +bool ContainsString(const char* const set[], |
| + size_t set_size, |
| + const base::string16& element) { |
| + if (!base::IsStringASCII(element)) |
| + return false; |
| + |
| + base::string16 trimmed_element; |
| + base::TrimString(element, base::ASCIIToUTF16("."), &trimmed_element); |
|
Ilya Sherman
2014/06/03 22:53:35
Why not just remove all dots, rather than removing
Evan Stade
2014/06/03 23:55:35
I copied this behavior from the original code (alt
Ilya Sherman
2014/06/04 00:13:45
In that case, how about not stripping trailing dot
Evan Stade
2014/06/04 00:24:23
But then "John Smith Jr." wouldn't match against "
Ilya Sherman
2014/06/04 00:34:32
*sigh*, alright, I cave :P
|
| + |
| + for (size_t i = 0; i < set_size; ++i) { |
| + if (LowerCaseEqualsASCII(trimmed_element, set[i])) |
| + return true; |
| + } |
| + |
| + return false; |
| +} |
| + |
| +// Removes common name prefixes from |name_tokens|. |
| +void StripPrefixes(std::vector<base::string16>* name_tokens) { |
| + std::vector<base::string16>::iterator iter = name_tokens->begin(); |
| + while(iter != name_tokens->end()) { |
| + if (!ContainsString(name_prefixes, arraysize(name_prefixes), *iter)) |
| + break; |
| + ++iter; |
| + } |
| + |
| + name_tokens->assign(iter, name_tokens->end()); |
| +} |
| + |
| +// Removes common name suffixes from |name_tokens|. |
| +void StripSuffixes(std::vector<base::string16>* name_tokens) { |
| + std::vector<base::string16>::iterator iter = name_tokens->end(); |
| + while(iter != name_tokens->begin()) { |
| + if (!ContainsString(name_suffixes, arraysize(name_suffixes), *--iter)) |
| + break; |
| + } |
| + |
| + name_tokens->assign(name_tokens->begin(), ++iter); |
| +} |
|
Ilya Sherman
2014/06/03 22:53:35
I think this can be written more simply using std:
Evan Stade
2014/06/03 23:55:35
but this breaks out of the loop the first time it
Ilya Sherman
2014/06/04 00:13:45
Hrm, you're right. Nevermind.
|
| + |
| +struct NameParts { |
| + base::string16 given; |
| + base::string16 middle; |
| + base::string16 family; |
|
Ilya Sherman
2014/06/03 22:53:35
nit: I think we should be consistent about first/m
Evan Stade
2014/06/03 23:55:35
I'd rather tackle that when we get around to imple
Ilya Sherman
2014/06/04 00:13:45
I'd really prefer to keep the file internally cons
Evan Stade
2014/06/04 00:24:23
Done.
|
| +}; |
| + |
| +// TODO(estade): this does Western name splitting. It should do different |
|
Ilya Sherman
2014/06/03 22:53:35
nit: "this" -> "This"
Evan Stade
2014/06/03 23:55:35
done, although the precedent that exists through C
|
| +// splitting based on the app locale. |
| +NameParts SplitName(const base::string16& name) { |
|
Ilya Sherman
2014/06/03 22:53:35
Optional nit: Perhaps "ParseName()" rather than "S
Evan Stade
2014/06/03 23:55:35
name inspired by com.android.providers.contacts.Na
|
| + std::vector<base::string16> name_tokens; |
| + Tokenize(name, base::ASCIIToUTF16(" ,"), &name_tokens); |
| + |
| + StripPrefixes(&name_tokens); |
| + |
| + if (name_tokens.size() > 2) |
|
Ilya Sherman
2014/06/03 22:53:34
Why > 2? Can suffixes ever be valid names?
Evan Stade
2014/06/03 23:55:35
Yes. John Ma.
Ilya Sherman
2014/06/04 00:13:45
Ok... in that case, is "Ma" more likely to be a su
Evan Stade
2014/06/04 00:24:23
Well, if John has a middle name he's SOL. But this
Ilya Sherman
2014/06/04 00:34:32
Well, sure, I suppose heuristics are always going
Evan Stade
2014/06/04 01:26:29
Done.
|
| + StripSuffixes(&name_tokens); |
| + |
| + NameParts parts; |
| + |
| + // Bad things have happened; just assume the whole thing is a given name. |
|
Ilya Sherman
2014/06/03 22:53:34
nit: Please move this comment into the if-stmt. U
Evan Stade
2014/06/03 23:55:35
Done.
|
| + if (name_tokens.empty()) { |
| + parts.given = name; |
| + return parts; |
| + } |
| + |
| + // Only one token, assume given name. |
| + if (name_tokens.size() == 1) { |
| + parts.given = name_tokens[0]; |
| + return parts; |
| + } |
| + |
| + // 2 or more tokens. Grab the family, which is the last word plus any |
| + // recognizable family prefixes. |
| + std::vector<base::string16> reverse_family_tokens; |
| + reverse_family_tokens.push_back(name_tokens.back()); |
| + name_tokens.pop_back(); |
| + while (name_tokens.size() >= 1) { |
| + if (!ContainsString(family_name_prefixes, |
| + arraysize(family_name_prefixes), |
| + name_tokens.back())) { |
|
Ilya Sherman
2014/06/03 22:53:34
nit: Please move this into the loop condition.
Evan Stade
2014/06/03 23:55:35
Done.
|
| + break; |
| + } |
| + |
| + reverse_family_tokens.push_back(name_tokens.back()); |
| + name_tokens.pop_back(); |
| + } |
| + |
| + std::vector<base::string16> family_tokens(reverse_family_tokens.rbegin(), |
| + reverse_family_tokens.rend()); |
|
Ilya Sherman
2014/06/03 22:53:34
nit: alignment
|
| + parts.family = JoinString(family_tokens, base::char16(' ')); |
| + |
| + // Take the last remaining token as the middle name (if there are at least 2 |
| + // tokens). |
| + if (name_tokens.size() > 1) { |
|
Ilya Sherman
2014/06/03 22:53:35
Optional nit: I think ">= 2" conveys the intent sl
Evan Stade
2014/06/03 23:55:35
sure, why not
|
| + parts.middle = name_tokens.back(); |
| + name_tokens.pop_back(); |
| + } |
| + |
| + // Remainder is given name. |
| + parts.given = JoinString(name_tokens, base::char16(' ')); |
|
Ilya Sherman
2014/06/03 22:53:34
Are multi-word given names really more common than
Evan Stade
2014/06/03 23:55:35
I don't know. Someone thought about it and decided
|
| + |
| + return parts; |
| +} |
| + |
| +} // namespace |
| + |
| NameInfo::NameInfo() {} |
| NameInfo::NameInfo(const NameInfo& info) : FormGroup() { |
| @@ -31,6 +155,7 @@ NameInfo& NameInfo::operator=(const NameInfo& info) { |
| first_ = info.first_; |
| middle_ = info.middle_; |
| last_ = info.last_; |
| + full_name_ = info.full_name_; |
| return *this; |
| } |
| @@ -46,13 +171,13 @@ base::string16 NameInfo::GetRawInfo(ServerFieldType type) const { |
| DCHECK_EQ(NAME, AutofillType(type).group()); |
| switch (type) { |
| case NAME_FIRST: |
| - return first(); |
| + return first_; |
| case NAME_MIDDLE: |
| - return middle(); |
| + return middle_; |
| case NAME_LAST: |
| - return last(); |
| + return last_; |
| case NAME_MIDDLE_INITIAL: |
| return MiddleInitial(); |
| @@ -91,6 +216,9 @@ void NameInfo::SetRawInfo(ServerFieldType type, const base::string16& value) { |
| } |
| base::string16 NameInfo::FullName() const { |
| + if (!full_name_.empty()) |
| + return full_name_; |
| + |
| std::vector<base::string16> full_name; |
| if (!first_.empty()) |
| full_name.push_back(first_); |
| @@ -108,34 +236,18 @@ base::string16 NameInfo::MiddleInitial() const { |
| if (middle_.empty()) |
| return base::string16(); |
| - base::string16 middle_name(middle()); |
| + base::string16 middle_name(middle_); |
| base::string16 initial; |
| initial.push_back(middle_name[0]); |
| return initial; |
| } |
| void NameInfo::SetFullName(const base::string16& full) { |
| - // Clear the names. |
| - first_ = base::string16(); |
| - middle_ = base::string16(); |
| - last_ = base::string16(); |
| - |
| - std::vector<base::string16> full_name_tokens; |
| - Tokenize(full, base::ASCIIToUTF16(" "), &full_name_tokens); |
| - |
| - // There are four possibilities: empty; first name; first and last names; |
| - // first, middle (possibly multiple strings) and then the last name. |
| - if (full_name_tokens.size() > 0) { |
| - first_ = full_name_tokens[0]; |
| - if (full_name_tokens.size() > 1) { |
| - last_ = full_name_tokens.back(); |
| - if (full_name_tokens.size() > 2) { |
| - full_name_tokens.erase(full_name_tokens.begin()); |
| - full_name_tokens.pop_back(); |
| - middle_ = JoinString(full_name_tokens, ' '); |
| - } |
| - } |
| - } |
| + full_name_ = full; |
| + NameParts parts = SplitName(full); |
| + first_ = parts.given; |
| + middle_ = parts.middle; |
| + last_ = parts.family; |
|
Ilya Sherman
2014/06/03 22:53:35
Optional nit: It might be cleaner to just pass in
Evan Stade
2014/06/03 23:55:35
That's true, but I guess I have an intrinsic avers
|
| } |
| EmailInfo::EmailInfo() {} |