Chromium Code Reviews| Index: components/autofill/core/browser/autofill_profile_comparator.cc |
| diff --git a/components/autofill/core/browser/autofill_profile_comparator.cc b/components/autofill/core/browser/autofill_profile_comparator.cc |
| index f34b03cb7a9e4fa845fdec179bafbac42e95b506..411346d18adaa059ea936005df3e4d4dfb055ebb 100644 |
| --- a/components/autofill/core/browser/autofill_profile_comparator.cc |
| +++ b/components/autofill/core/browser/autofill_profile_comparator.cc |
| @@ -7,7 +7,9 @@ |
| #include <algorithm> |
| #include <vector> |
| +#include "base/i18n/case_conversion.h" |
| #include "base/i18n/char_iterator.h" |
| +#include "base/strings/string_piece.h" |
| #include "base/strings/string_split.h" |
| #include "base/strings/string_util.h" |
| #include "base/strings/utf_string_conversion_utils.h" |
| @@ -20,6 +22,10 @@ namespace { |
| const base::char16 kSpace[] = {L' ', L'\0'}; |
| +bool ContainsNewline(base::StringPiece16 text) { |
| + return text.find('\n') != base::StringPiece16::npos; |
| +} |
| + |
| } // namespace |
| AutofillProfileComparator::AutofillProfileComparator( |
| @@ -113,6 +119,144 @@ bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1, |
| HaveMergeableAddresses(p1, p2); |
| } |
| +bool AutofillProfileComparator::MergeNames(const AutofillProfile& p1, |
| + const AutofillProfile& p2, |
| + NameInfo* name_info) const { |
| + DCHECK(HaveMergeableNames(p1, p2)); |
| + |
| + const AutofillType kFullName(NAME_FULL); |
| + const base::string16& full_name_1 = p1.GetInfo(kFullName, app_locale_); |
| + const base::string16& full_name_2 = p2.GetInfo(kFullName, app_locale_); |
| + const base::string16& normalized_full_name_1 = |
| + NormalizeForComparison(full_name_1); |
| + const base::string16& normalized_full_name_2 = |
| + NormalizeForComparison(full_name_2); |
| + |
| + const base::string16* best_name = nullptr; |
| + if (normalized_full_name_1.empty()) { |
| + // p1 has no name, so use the name from p2. |
| + best_name = &full_name_2; |
| + } else if (normalized_full_name_2.empty()) { |
| + // p2 has no name, so use the name from p1. |
| + best_name = &full_name_1; |
| + } else if (IsNameVariantOf(normalized_full_name_1, normalized_full_name_2)) { |
| + // full_name_2 is a variant of full_name_1. |
| + best_name = &full_name_1; |
| + } else { |
| + // If the assertion that p1 and p2 have mergeable names is true, then |
| + // full_name_1 must be a name variant of full_name_2; |
| + best_name = &full_name_2; |
| + } |
| + |
| + name_info->SetInfo(AutofillType(NAME_FULL), *best_name, app_locale_); |
| + return true; |
| +} |
| + |
| +bool AutofillProfileComparator::MergeAddresses(const AutofillProfile& p1, |
| + const AutofillProfile& p2, |
| + Address* address) const { |
| + DCHECK(HaveMergeableAddresses(p1, p2)); |
| + |
| + // One of the countries is empty or they are the same modulo case, so we just |
| + // have to find the non-empty one, if any. |
| + const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE); |
| + const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_); |
| + address->SetInfo( |
| + kCountryCode, |
| + base::i18n::ToUpper( |
| + country1.empty() ? country1 : p2.GetInfo(kCountryCode, app_locale_)), |
| + app_locale_); |
| + |
| + // One of the zip codes is empty, they are the same, or one is a substring |
| + // of the other. So, we have to find the longest one. |
| + const AutofillType kZipCode(ADDRESS_HOME_ZIP); |
| + const base::string16& zip1 = p1.GetInfo(kZipCode, app_locale_); |
| + const base::string16& zip2 = p2.GetInfo(kZipCode, app_locale_); |
| + address->SetInfo(kZipCode, (zip1.size() > zip2.size() ? zip1 : zip2), |
| + app_locale_); |
| + |
| + // One of the states is empty or one of the states has a subset of tokens from |
| + // the other. Pick the non-empty state that is shorter. This is usually the |
| + // abbreviated one. |
| + const AutofillType kState(ADDRESS_HOME_STATE); |
| + const base::string16& state1 = p1.GetInfo(kState, app_locale_); |
| + const base::string16& state2 = p2.GetInfo(kState, app_locale_); |
| + if (state1.empty()) { |
| + address->SetInfo(kState, state2, app_locale_); |
| + } else if (state2.empty()) { |
| + address->SetInfo(kState, state1, app_locale_); |
| + } else { |
| + address->SetInfo(kState, (state1.size() < state2.size() ? state1 : state2), |
| + app_locale_); |
| + } |
| + |
| + // One of the cities is empty or one of the cities has a subset of tokens from |
| + // the other. Pick the non-empty city that is shorter. This is usually the |
| + // abbreviated one. |
| + const AutofillType kCity(ADDRESS_HOME_STATE); |
| + const base::string16& city1 = p1.GetInfo(kCity, app_locale_); |
| + const base::string16& city2 = p2.GetInfo(kCity, app_locale_); |
| + if (city1.empty()) { |
| + address->SetInfo(kCity, city2, app_locale_); |
| + } else if (city2.empty()) { |
| + address->SetInfo(kCity, city1, app_locale_); |
| + } else { |
| + address->SetInfo(kCity, (city1.size() < city2.size() ? city1 : city2), |
| + app_locale_); |
| + } |
| + |
| + // One of the addresses is empty or one of the addresses has a subset of |
| + // tokens from the other. Pick the non-em that is shorter. This is usually the |
| + // abbreviated one. |
| + const AutofillType kStreetAddress(ADDRESS_HOME_STREET_ADDRESS); |
| + const base::string16& address1 = p1.GetInfo(kStreetAddress, app_locale_); |
| + const base::string16& address2 = p2.GetInfo(kStreetAddress, app_locale_); |
| + // If one of the addresses is empty then use the other. |
| + if (address1.empty()) { |
| + address->SetInfo(kStreetAddress, address2, app_locale_); |
| + } else if (address2.empty()) { |
| + address->SetInfo(kStreetAddress, address1, app_locale_); |
| + } else { |
| + // Prefer the multi-line address if one is multi-line and the other isn't. |
| + bool address1_multiline = ContainsNewline(address1); |
| + bool address2_multiline = ContainsNewline(address2); |
| + if (address1_multiline && !address2_multiline) { |
| + address->SetInfo(kStreetAddress, address1, app_locale_); |
| + } else if (address2_multiline && !address1_multiline) { |
| + address->SetInfo(kStreetAddress, address2, app_locale_); |
| + } else { |
| + // Prefer the one with more tokens if they're both single-line or both |
| + // multi-line addresses. |
| + int result = CompareTokens(NormalizeForComparison(address1), |
| + NormalizeForComparison(address2)); |
| + switch (result) { |
| + case 0: |
|
Mathieu
2016/06/21 18:55:07
use enum?
Roger McFarlane (Chromium)
2016/06/23 18:27:38
Done.
|
| + // They have the same set of unique tokens. Let's pick the one that's |
| + // longer. |
| + address->SetInfo( |
| + kStreetAddress, |
| + (address1.size() > address2.size() ? address1 : address2), |
| + app_locale_); |
| + break; |
| + case 1: |
| + // address1 has more unique tokens than address2. |
| + address->SetInfo(kStreetAddress, address1, app_locale_); |
| + break; |
| + case 2: |
| + // address2 has more unique tokens than address1. |
| + address->SetInfo(kStreetAddress, address1, app_locale_); |
| + break; |
| + default: |
| + // The addresses aren't mergeable and we shouldn't be doing any of |
| + // this. |
| + NOTREACHED(); |
| + return false; |
| + } |
| + } |
| + } |
| + return true; |
| +} |
| + |
| // static |
| std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens( |
| base::StringPiece16 s) { |
| @@ -122,15 +266,25 @@ std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens( |
| } |
| // static |
| -bool AutofillProfileComparator::HaveSameTokens(base::StringPiece16 s1, |
| - base::StringPiece16 s2) { |
| +AutofillProfileComparator::CompareTokensResult |
| +AutofillProfileComparator::CompareTokens(base::StringPiece16 s1, |
| + base::StringPiece16 s2) { |
| + // Note: std::include() expects the items in each range to be in sorted order, |
| + // hence the use of std::set<> instead of std::unordered_set<>. |
| std::set<base::StringPiece16> t1 = UniqueTokens(s1); |
| std::set<base::StringPiece16> t2 = UniqueTokens(s2); |
| - // Note: std::include() expects the items in each range to be in sorted order, |
| - // hence the use of std::set<> instead of std::unordered_set<>. |
| - return std::includes(t1.begin(), t1.end(), t2.begin(), t2.end()) || |
| - std::includes(t2.begin(), t2.end(), t1.begin(), t1.end()); |
| + // Does s1 contains all of the tokens in s2? As a special case, return 0 if |
| + // the two sets are exactly the samel. |
| + if (std::includes(t1.begin(), t1.end(), t2.begin(), t2.end())) |
| + return t1.size() == t2.size() ? SAME_TOKENS : S1_CONTAINS_S2; |
| + |
| + // Does s2 contain all of the tokens in s1? |
| + if (std::includes(t2.begin(), t2.end(), t1.begin(), t1.end())) |
| + return S2_CONTAINS_S1; |
| + |
| + // Neither string contains all of the tokens from the other. |
| + return DIFFERENT_TOKENS; |
| } |
| // static |
| @@ -152,7 +306,8 @@ std::set<base::string16> AutofillProfileComparator::GetNamePartVariants( |
| // appends this sub-name and one that appends the initial of this sub-name. |
| // Duplicates will be discarded when they're added to the variants set. |
| for (const base::string16& sub_name : sub_names) { |
| - if (sub_name.empty()) continue; |
| + if (sub_name.empty()) |
| + continue; |
| std::vector<base::string16> new_variants; |
| for (const base::string16& variant : variants) { |
| new_variants.push_back(base::CollapseWhitespace( |
| @@ -167,7 +322,8 @@ std::set<base::string16> AutofillProfileComparator::GetNamePartVariants( |
| // initials. |
| base::string16 initials; |
| for (const base::string16& sub_name : sub_names) { |
| - if (sub_name.empty()) continue; |
| + if (sub_name.empty()) |
| + continue; |
| initials.push_back(sub_name[0]); |
| } |
| variants.insert(initials); |
| @@ -253,7 +409,7 @@ bool AutofillProfileComparator::HaveMergeableCompanyNames( |
| const base::string16& company_name_2 = NormalizeForComparison( |
| p2.GetInfo(AutofillType(COMPANY_NAME), app_locale_)); |
| return company_name_1.empty() || company_name_2.empty() || |
| - HaveSameTokens(company_name_1, company_name_2); |
| + CompareTokens(company_name_1, company_name_2) != DIFFERENT_TOKENS; |
| } |
| bool AutofillProfileComparator::HaveMergeablePhoneNumbers( |
| @@ -300,10 +456,9 @@ bool AutofillProfileComparator::HaveMergeableAddresses( |
| const AutofillProfile& p2) const { |
| // If the address are not in the same country, then they're not the same. If |
| // one of the address countries is unknown/invalid the comparison continues. |
| - const base::string16& country1 = p1.GetInfo( |
| - AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_); |
| - const base::string16& country2 = p2.GetInfo( |
| - AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_); |
| + const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE); |
| + const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_); |
| + const base::string16& country2 = p2.GetInfo(kCountryCode, app_locale_); |
| if (!country1.empty() && !country2.empty() && |
| !case_insensitive_compare_.StringsEqual(country1, country2)) { |
| return false; |
| @@ -317,12 +472,11 @@ bool AutofillProfileComparator::HaveMergeableAddresses( |
| // ---- |
| // If the addresses are definitely not in the same zip/area code then we're |
| // done. Otherwise,the comparison continues. |
| + const AutofillType kZipCode(ADDRESS_HOME_ZIP); |
| const base::string16& zip1 = NormalizeForComparison( |
| - p1.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_), |
| - DISCARD_WHITESPACE); |
| + p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); |
| const base::string16& zip2 = NormalizeForComparison( |
| - p2.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_), |
| - DISCARD_WHITESPACE); |
| + p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); |
| if (!zip1.empty() && !zip2.empty() && |
| zip1.find(zip2) == base::string16::npos && |
| zip2.find(zip1) == base::string16::npos) { |
| @@ -331,41 +485,47 @@ bool AutofillProfileComparator::HaveMergeableAddresses( |
| // State |
| // ------ |
| - // Heuristic: If the match is between non-empty zip codes then we can infer |
| + // Heuristic: States are mergeable if one is a (possibly empty) bag of words |
| + // subset of the other. |
| + // |
| + // TODO(rogerm): If the match is between non-empty zip codes then we can infer |
| // that the two state strings are intended to have the same meaning. This |
| // handles the cases where we have invalid or poorly formed data in one of the |
| - // state values (like "Select one", or "CA - California"). Otherwise, we |
| - // actually have to check if the states map to the the same set of tokens. |
| - const base::string16& state1 = NormalizeForComparison( |
| - p1.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_)); |
| - const base::string16& state2 = NormalizeForComparison( |
| - p2.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_)); |
| - if ((zip1.empty() || zip2.empty()) && !HaveSameTokens(state1, state2)) { |
| + // state values (like "Select one", or "CA - California"). |
| + const AutofillType kState(ADDRESS_HOME_STATE); |
| + const base::string16& state1 = |
| + NormalizeForComparison(p1.GetInfo(kState, app_locale_)); |
| + const base::string16& state2 = |
| + NormalizeForComparison(p2.GetInfo(kState, app_locale_)); |
| + if (CompareTokens(state1, state2) == DIFFERENT_TOKENS) { |
| return false; |
| } |
| // City |
| // ------ |
| - // Heuristic: If the match is between non-empty zip codes then we can infer |
| + // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words |
| + // subset of the other. |
| + // |
| + // TODO(rogerm): If the match is between non-empty zip codes then we can infer |
| // that the two city strings are intended to have the same meaning. This |
| - // handles the cases where we have a city vs one of its suburbs. Otherwise, we |
| - // actually have to check if the cities map to the the same set of tokens. |
| + // handles the cases where we have a city vs one of its suburbs. |
| const base::string16& city1 = NormalizeForComparison( |
| p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); |
| const base::string16& city2 = NormalizeForComparison( |
| p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); |
| - if ((zip1.empty() || zip2.empty()) && !HaveSameTokens(city1, city2)) { |
| + if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) { |
| return false; |
| } |
| // Address |
| // -------- |
| - // Heuristic: Use bag of words comparison on the post-normalized addresses. |
| + // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag |
| + // of words subset of the other. |
| const base::string16& address1 = NormalizeForComparison( |
| p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); |
| const base::string16& address2 = NormalizeForComparison( |
| p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); |
| - if (!HaveSameTokens(address1, address2)) { |
| + if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) { |
| return false; |
| } |