Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(17)

Unified Diff: components/autofill/core/browser/autofill_profile_comparator.cc

Issue 2088443002: Expand autofill profile merge logic. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/autofill/core/browser/autofill_profile_comparator.cc
diff --git a/components/autofill/core/browser/autofill_profile_comparator.cc b/components/autofill/core/browser/autofill_profile_comparator.cc
index f34b03cb7a9e4fa845fdec179bafbac42e95b506..411346d18adaa059ea936005df3e4d4dfb055ebb 100644
--- a/components/autofill/core/browser/autofill_profile_comparator.cc
+++ b/components/autofill/core/browser/autofill_profile_comparator.cc
@@ -7,7 +7,9 @@
#include <algorithm>
#include <vector>
+#include "base/i18n/case_conversion.h"
#include "base/i18n/char_iterator.h"
+#include "base/strings/string_piece.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversion_utils.h"
@@ -20,6 +22,10 @@ namespace {
const base::char16 kSpace[] = {L' ', L'\0'};
+bool ContainsNewline(base::StringPiece16 text) {
+ return text.find('\n') != base::StringPiece16::npos;
+}
+
} // namespace
AutofillProfileComparator::AutofillProfileComparator(
@@ -113,6 +119,144 @@ bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1,
HaveMergeableAddresses(p1, p2);
}
+bool AutofillProfileComparator::MergeNames(const AutofillProfile& p1,
+ const AutofillProfile& p2,
+ NameInfo* name_info) const {
+ DCHECK(HaveMergeableNames(p1, p2));
+
+ const AutofillType kFullName(NAME_FULL);
+ const base::string16& full_name_1 = p1.GetInfo(kFullName, app_locale_);
+ const base::string16& full_name_2 = p2.GetInfo(kFullName, app_locale_);
+ const base::string16& normalized_full_name_1 =
+ NormalizeForComparison(full_name_1);
+ const base::string16& normalized_full_name_2 =
+ NormalizeForComparison(full_name_2);
+
+ const base::string16* best_name = nullptr;
+ if (normalized_full_name_1.empty()) {
+ // p1 has no name, so use the name from p2.
+ best_name = &full_name_2;
+ } else if (normalized_full_name_2.empty()) {
+ // p2 has no name, so use the name from p1.
+ best_name = &full_name_1;
+ } else if (IsNameVariantOf(normalized_full_name_1, normalized_full_name_2)) {
+ // full_name_2 is a variant of full_name_1.
+ best_name = &full_name_1;
+ } else {
+ // If the assertion that p1 and p2 have mergeable names is true, then
+ // full_name_1 must be a name variant of full_name_2;
+ best_name = &full_name_2;
+ }
+
+ name_info->SetInfo(AutofillType(NAME_FULL), *best_name, app_locale_);
+ return true;
+}
+
+bool AutofillProfileComparator::MergeAddresses(const AutofillProfile& p1,
+ const AutofillProfile& p2,
+ Address* address) const {
+ DCHECK(HaveMergeableAddresses(p1, p2));
+
+ // One of the countries is empty or they are the same modulo case, so we just
+ // have to find the non-empty one, if any.
+ const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);
+ const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_);
+ address->SetInfo(
+ kCountryCode,
+ base::i18n::ToUpper(
+ country1.empty() ? country1 : p2.GetInfo(kCountryCode, app_locale_)),
+ app_locale_);
+
+ // One of the zip codes is empty, they are the same, or one is a substring
+ // of the other. So, we have to find the longest one.
+ const AutofillType kZipCode(ADDRESS_HOME_ZIP);
+ const base::string16& zip1 = p1.GetInfo(kZipCode, app_locale_);
+ const base::string16& zip2 = p2.GetInfo(kZipCode, app_locale_);
+ address->SetInfo(kZipCode, (zip1.size() > zip2.size() ? zip1 : zip2),
+ app_locale_);
+
+ // One of the states is empty or one of the states has a subset of tokens from
+ // the other. Pick the non-empty state that is shorter. This is usually the
+ // abbreviated one.
+ const AutofillType kState(ADDRESS_HOME_STATE);
+ const base::string16& state1 = p1.GetInfo(kState, app_locale_);
+ const base::string16& state2 = p2.GetInfo(kState, app_locale_);
+ if (state1.empty()) {
+ address->SetInfo(kState, state2, app_locale_);
+ } else if (state2.empty()) {
+ address->SetInfo(kState, state1, app_locale_);
+ } else {
+ address->SetInfo(kState, (state1.size() < state2.size() ? state1 : state2),
+ app_locale_);
+ }
+
+ // One of the cities is empty or one of the cities has a subset of tokens from
+ // the other. Pick the non-empty city that is shorter. This is usually the
+ // abbreviated one.
+ const AutofillType kCity(ADDRESS_HOME_STATE);
+ const base::string16& city1 = p1.GetInfo(kCity, app_locale_);
+ const base::string16& city2 = p2.GetInfo(kCity, app_locale_);
+ if (city1.empty()) {
+ address->SetInfo(kCity, city2, app_locale_);
+ } else if (city2.empty()) {
+ address->SetInfo(kCity, city1, app_locale_);
+ } else {
+ address->SetInfo(kCity, (city1.size() < city2.size() ? city1 : city2),
+ app_locale_);
+ }
+
+ // One of the addresses is empty or one of the addresses has a subset of
+ // tokens from the other. Pick the non-em that is shorter. This is usually the
+ // abbreviated one.
+ const AutofillType kStreetAddress(ADDRESS_HOME_STREET_ADDRESS);
+ const base::string16& address1 = p1.GetInfo(kStreetAddress, app_locale_);
+ const base::string16& address2 = p2.GetInfo(kStreetAddress, app_locale_);
+ // If one of the addresses is empty then use the other.
+ if (address1.empty()) {
+ address->SetInfo(kStreetAddress, address2, app_locale_);
+ } else if (address2.empty()) {
+ address->SetInfo(kStreetAddress, address1, app_locale_);
+ } else {
+ // Prefer the multi-line address if one is multi-line and the other isn't.
+ bool address1_multiline = ContainsNewline(address1);
+ bool address2_multiline = ContainsNewline(address2);
+ if (address1_multiline && !address2_multiline) {
+ address->SetInfo(kStreetAddress, address1, app_locale_);
+ } else if (address2_multiline && !address1_multiline) {
+ address->SetInfo(kStreetAddress, address2, app_locale_);
+ } else {
+ // Prefer the one with more tokens if they're both single-line or both
+ // multi-line addresses.
+ int result = CompareTokens(NormalizeForComparison(address1),
+ NormalizeForComparison(address2));
+ switch (result) {
+ case 0:
Mathieu 2016/06/21 18:55:07 use enum?
Roger McFarlane (Chromium) 2016/06/23 18:27:38 Done.
+ // They have the same set of unique tokens. Let's pick the one that's
+ // longer.
+ address->SetInfo(
+ kStreetAddress,
+ (address1.size() > address2.size() ? address1 : address2),
+ app_locale_);
+ break;
+ case 1:
+ // address1 has more unique tokens than address2.
+ address->SetInfo(kStreetAddress, address1, app_locale_);
+ break;
+ case 2:
+ // address2 has more unique tokens than address1.
+ address->SetInfo(kStreetAddress, address1, app_locale_);
+ break;
+ default:
+ // The addresses aren't mergeable and we shouldn't be doing any of
+ // this.
+ NOTREACHED();
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
// static
std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens(
base::StringPiece16 s) {
@@ -122,15 +266,25 @@ std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens(
}
// static
-bool AutofillProfileComparator::HaveSameTokens(base::StringPiece16 s1,
- base::StringPiece16 s2) {
+AutofillProfileComparator::CompareTokensResult
+AutofillProfileComparator::CompareTokens(base::StringPiece16 s1,
+ base::StringPiece16 s2) {
+ // Note: std::include() expects the items in each range to be in sorted order,
+ // hence the use of std::set<> instead of std::unordered_set<>.
std::set<base::StringPiece16> t1 = UniqueTokens(s1);
std::set<base::StringPiece16> t2 = UniqueTokens(s2);
- // Note: std::include() expects the items in each range to be in sorted order,
- // hence the use of std::set<> instead of std::unordered_set<>.
- return std::includes(t1.begin(), t1.end(), t2.begin(), t2.end()) ||
- std::includes(t2.begin(), t2.end(), t1.begin(), t1.end());
+ // Does s1 contains all of the tokens in s2? As a special case, return 0 if
+ // the two sets are exactly the samel.
+ if (std::includes(t1.begin(), t1.end(), t2.begin(), t2.end()))
+ return t1.size() == t2.size() ? SAME_TOKENS : S1_CONTAINS_S2;
+
+ // Does s2 contain all of the tokens in s1?
+ if (std::includes(t2.begin(), t2.end(), t1.begin(), t1.end()))
+ return S2_CONTAINS_S1;
+
+ // Neither string contains all of the tokens from the other.
+ return DIFFERENT_TOKENS;
}
// static
@@ -152,7 +306,8 @@ std::set<base::string16> AutofillProfileComparator::GetNamePartVariants(
// appends this sub-name and one that appends the initial of this sub-name.
// Duplicates will be discarded when they're added to the variants set.
for (const base::string16& sub_name : sub_names) {
- if (sub_name.empty()) continue;
+ if (sub_name.empty())
+ continue;
std::vector<base::string16> new_variants;
for (const base::string16& variant : variants) {
new_variants.push_back(base::CollapseWhitespace(
@@ -167,7 +322,8 @@ std::set<base::string16> AutofillProfileComparator::GetNamePartVariants(
// initials.
base::string16 initials;
for (const base::string16& sub_name : sub_names) {
- if (sub_name.empty()) continue;
+ if (sub_name.empty())
+ continue;
initials.push_back(sub_name[0]);
}
variants.insert(initials);
@@ -253,7 +409,7 @@ bool AutofillProfileComparator::HaveMergeableCompanyNames(
const base::string16& company_name_2 = NormalizeForComparison(
p2.GetInfo(AutofillType(COMPANY_NAME), app_locale_));
return company_name_1.empty() || company_name_2.empty() ||
- HaveSameTokens(company_name_1, company_name_2);
+ CompareTokens(company_name_1, company_name_2) != DIFFERENT_TOKENS;
}
bool AutofillProfileComparator::HaveMergeablePhoneNumbers(
@@ -300,10 +456,9 @@ bool AutofillProfileComparator::HaveMergeableAddresses(
const AutofillProfile& p2) const {
// If the address are not in the same country, then they're not the same. If
// one of the address countries is unknown/invalid the comparison continues.
- const base::string16& country1 = p1.GetInfo(
- AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_);
- const base::string16& country2 = p2.GetInfo(
- AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_);
+ const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);
+ const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_);
+ const base::string16& country2 = p2.GetInfo(kCountryCode, app_locale_);
if (!country1.empty() && !country2.empty() &&
!case_insensitive_compare_.StringsEqual(country1, country2)) {
return false;
@@ -317,12 +472,11 @@ bool AutofillProfileComparator::HaveMergeableAddresses(
// ----
// If the addresses are definitely not in the same zip/area code then we're
// done. Otherwise,the comparison continues.
+ const AutofillType kZipCode(ADDRESS_HOME_ZIP);
const base::string16& zip1 = NormalizeForComparison(
- p1.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_),
- DISCARD_WHITESPACE);
+ p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);
const base::string16& zip2 = NormalizeForComparison(
- p2.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_),
- DISCARD_WHITESPACE);
+ p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);
if (!zip1.empty() && !zip2.empty() &&
zip1.find(zip2) == base::string16::npos &&
zip2.find(zip1) == base::string16::npos) {
@@ -331,41 +485,47 @@ bool AutofillProfileComparator::HaveMergeableAddresses(
// State
// ------
- // Heuristic: If the match is between non-empty zip codes then we can infer
+ // Heuristic: States are mergeable if one is a (possibly empty) bag of words
+ // subset of the other.
+ //
+ // TODO(rogerm): If the match is between non-empty zip codes then we can infer
// that the two state strings are intended to have the same meaning. This
// handles the cases where we have invalid or poorly formed data in one of the
- // state values (like "Select one", or "CA - California"). Otherwise, we
- // actually have to check if the states map to the the same set of tokens.
- const base::string16& state1 = NormalizeForComparison(
- p1.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_));
- const base::string16& state2 = NormalizeForComparison(
- p2.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_));
- if ((zip1.empty() || zip2.empty()) && !HaveSameTokens(state1, state2)) {
+ // state values (like "Select one", or "CA - California").
+ const AutofillType kState(ADDRESS_HOME_STATE);
+ const base::string16& state1 =
+ NormalizeForComparison(p1.GetInfo(kState, app_locale_));
+ const base::string16& state2 =
+ NormalizeForComparison(p2.GetInfo(kState, app_locale_));
+ if (CompareTokens(state1, state2) == DIFFERENT_TOKENS) {
return false;
}
// City
// ------
- // Heuristic: If the match is between non-empty zip codes then we can infer
+ // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words
+ // subset of the other.
+ //
+ // TODO(rogerm): If the match is between non-empty zip codes then we can infer
// that the two city strings are intended to have the same meaning. This
- // handles the cases where we have a city vs one of its suburbs. Otherwise, we
- // actually have to check if the cities map to the the same set of tokens.
+ // handles the cases where we have a city vs one of its suburbs.
const base::string16& city1 = NormalizeForComparison(
p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));
const base::string16& city2 = NormalizeForComparison(
p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));
- if ((zip1.empty() || zip2.empty()) && !HaveSameTokens(city1, city2)) {
+ if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) {
return false;
}
// Address
// --------
- // Heuristic: Use bag of words comparison on the post-normalized addresses.
+ // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag
+ // of words subset of the other.
const base::string16& address1 = NormalizeForComparison(
p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));
const base::string16& address2 = NormalizeForComparison(
p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));
- if (!HaveSameTokens(address1, address2)) {
+ if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) {
return false;
}

Powered by Google App Engine
This is Rietveld 408576698