components/autofill/core/browser/autofill_profile_comparator.cc - Issue 2088443002: Expand autofill profile merge logic.

Unified Diff: components/autofill/core/browser/autofill_profile_comparator.cc

Issue 2088443002: Expand autofill profile merge logic. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « components/autofill/core/browser/autofill_profile_comparator.h ('k') | components/autofill/core/browser/autofill_profile_comparator_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: components/autofill/core/browser/autofill_profile_comparator.cc

diff --git a/components/autofill/core/browser/autofill_profile_comparator.cc b/components/autofill/core/browser/autofill_profile_comparator.cc

index f34b03cb7a9e4fa845fdec179bafbac42e95b506..411346d18adaa059ea936005df3e4d4dfb055ebb 100644

--- a/components/autofill/core/browser/autofill_profile_comparator.cc

+++ b/components/autofill/core/browser/autofill_profile_comparator.cc

@@ -7,7 +7,9 @@

#include <algorithm>

#include <vector>

+#include "base/i18n/case_conversion.h"

#include "base/i18n/char_iterator.h"

+#include "base/strings/string_piece.h"

#include "base/strings/string_split.h"

#include "base/strings/string_util.h"

#include "base/strings/utf_string_conversion_utils.h"

@@ -20,6 +22,10 @@ namespace {

const base::char16 kSpace[] = {L' ', L'\0'};

+bool ContainsNewline(base::StringPiece16 text) {

+ return text.find('\n') != base::StringPiece16::npos;

} // namespace

AutofillProfileComparator::AutofillProfileComparator(

@@ -113,6 +119,144 @@ bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1,

HaveMergeableAddresses(p1, p2);

}

+bool AutofillProfileComparator::MergeNames(const AutofillProfile& p1,

+ const AutofillProfile& p2,

+ NameInfo* name_info) const {

+ DCHECK(HaveMergeableNames(p1, p2));

+ const AutofillType kFullName(NAME_FULL);

+ const base::string16& full_name_1 = p1.GetInfo(kFullName, app_locale_);

+ const base::string16& full_name_2 = p2.GetInfo(kFullName, app_locale_);

+ const base::string16& normalized_full_name_1 =

+ NormalizeForComparison(full_name_1);

+ const base::string16& normalized_full_name_2 =

+ NormalizeForComparison(full_name_2);

+ const base::string16* best_name = nullptr;

+ if (normalized_full_name_1.empty()) {

+ // p1 has no name, so use the name from p2.

+ best_name = &full_name_2;

+ } else if (normalized_full_name_2.empty()) {

+ // p2 has no name, so use the name from p1.

+ best_name = &full_name_1;

+ } else if (IsNameVariantOf(normalized_full_name_1, normalized_full_name_2)) {

+ // full_name_2 is a variant of full_name_1.

+ best_name = &full_name_1;

+ } else {

+ // If the assertion that p1 and p2 have mergeable names is true, then

+ // full_name_1 must be a name variant of full_name_2;

+ best_name = &full_name_2;

+ }

+ name_info->SetInfo(AutofillType(NAME_FULL), *best_name, app_locale_);

+ return true;

+bool AutofillProfileComparator::MergeAddresses(const AutofillProfile& p1,

+ const AutofillProfile& p2,

+ Address* address) const {

+ DCHECK(HaveMergeableAddresses(p1, p2));

+ // One of the countries is empty or they are the same modulo case, so we just

+ // have to find the non-empty one, if any.

+ const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);

+ const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_);

+ address->SetInfo(

+ kCountryCode,

+ base::i18n::ToUpper(

+ country1.empty() ? country1 : p2.GetInfo(kCountryCode, app_locale_)),

+ app_locale_);

+ // One of the zip codes is empty, they are the same, or one is a substring

+ // of the other. So, we have to find the longest one.

+ const AutofillType kZipCode(ADDRESS_HOME_ZIP);

+ const base::string16& zip1 = p1.GetInfo(kZipCode, app_locale_);

+ const base::string16& zip2 = p2.GetInfo(kZipCode, app_locale_);

+ address->SetInfo(kZipCode, (zip1.size() > zip2.size() ? zip1 : zip2),

+ app_locale_);

+ // One of the states is empty or one of the states has a subset of tokens from

+ // the other. Pick the non-empty state that is shorter. This is usually the

+ // abbreviated one.

+ const AutofillType kState(ADDRESS_HOME_STATE);

+ const base::string16& state1 = p1.GetInfo(kState, app_locale_);

+ const base::string16& state2 = p2.GetInfo(kState, app_locale_);

+ if (state1.empty()) {

+ address->SetInfo(kState, state2, app_locale_);

+ } else if (state2.empty()) {

+ address->SetInfo(kState, state1, app_locale_);

+ } else {

+ address->SetInfo(kState, (state1.size() < state2.size() ? state1 : state2),

+ app_locale_);

+ }

+ // One of the cities is empty or one of the cities has a subset of tokens from

+ // the other. Pick the non-empty city that is shorter. This is usually the

+ // abbreviated one.

+ const AutofillType kCity(ADDRESS_HOME_STATE);

+ const base::string16& city1 = p1.GetInfo(kCity, app_locale_);

+ const base::string16& city2 = p2.GetInfo(kCity, app_locale_);

+ if (city1.empty()) {

+ address->SetInfo(kCity, city2, app_locale_);

+ } else if (city2.empty()) {

+ address->SetInfo(kCity, city1, app_locale_);

+ } else {

+ address->SetInfo(kCity, (city1.size() < city2.size() ? city1 : city2),

+ app_locale_);

+ }

+ // One of the addresses is empty or one of the addresses has a subset of

+ // tokens from the other. Pick the non-em that is shorter. This is usually the

+ // abbreviated one.

+ const AutofillType kStreetAddress(ADDRESS_HOME_STREET_ADDRESS);

+ const base::string16& address1 = p1.GetInfo(kStreetAddress, app_locale_);

+ const base::string16& address2 = p2.GetInfo(kStreetAddress, app_locale_);

+ // If one of the addresses is empty then use the other.

+ if (address1.empty()) {

+ address->SetInfo(kStreetAddress, address2, app_locale_);

+ } else if (address2.empty()) {

+ address->SetInfo(kStreetAddress, address1, app_locale_);

+ } else {

+ // Prefer the multi-line address if one is multi-line and the other isn't.

+ bool address1_multiline = ContainsNewline(address1);

+ bool address2_multiline = ContainsNewline(address2);

+ if (address1_multiline && !address2_multiline) {

+ address->SetInfo(kStreetAddress, address1, app_locale_);

+ } else if (address2_multiline && !address1_multiline) {

+ address->SetInfo(kStreetAddress, address2, app_locale_);

+ } else {

+ // Prefer the one with more tokens if they're both single-line or both

+ // multi-line addresses.

+ int result = CompareTokens(NormalizeForComparison(address1),

+ NormalizeForComparison(address2));

+ switch (result) {

+ case 0:

Mathieu 2016/06/21 18:55:07 use enum?

Roger McFarlane (Chromium) 2016/06/23 18:27:38 Done.

+ // They have the same set of unique tokens. Let's pick the one that's

+ // longer.

+ address->SetInfo(

+ kStreetAddress,

+ (address1.size() > address2.size() ? address1 : address2),

+ app_locale_);

+ break;

+ case 1:

+ // address1 has more unique tokens than address2.

+ address->SetInfo(kStreetAddress, address1, app_locale_);

+ break;

+ case 2:

+ // address2 has more unique tokens than address1.

+ address->SetInfo(kStreetAddress, address1, app_locale_);

+ break;

+ default:

+ // The addresses aren't mergeable and we shouldn't be doing any of

+ // this.

+ NOTREACHED();

+ return false;

+ }

+ return true;

// static

std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens(

base::StringPiece16 s) {

@@ -122,15 +266,25 @@ std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens(

}

// static

-bool AutofillProfileComparator::HaveSameTokens(base::StringPiece16 s1,

- base::StringPiece16 s2) {

+AutofillProfileComparator::CompareTokensResult

+AutofillProfileComparator::CompareTokens(base::StringPiece16 s1,

+ base::StringPiece16 s2) {

+ // Note: std::include() expects the items in each range to be in sorted order,

+ // hence the use of std::set<> instead of std::unordered_set<>.

std::set<base::StringPiece16> t1 = UniqueTokens(s1);

std::set<base::StringPiece16> t2 = UniqueTokens(s2);

- // Note: std::include() expects the items in each range to be in sorted order,

- // hence the use of std::set<> instead of std::unordered_set<>.

- return std::includes(t1.begin(), t1.end(), t2.begin(), t2.end()) ||

- std::includes(t2.begin(), t2.end(), t1.begin(), t1.end());

+ // Does s1 contains all of the tokens in s2? As a special case, return 0 if

+ // the two sets are exactly the samel.

+ if (std::includes(t1.begin(), t1.end(), t2.begin(), t2.end()))

+ return t1.size() == t2.size() ? SAME_TOKENS : S1_CONTAINS_S2;

+ // Does s2 contain all of the tokens in s1?

+ if (std::includes(t2.begin(), t2.end(), t1.begin(), t1.end()))

+ return S2_CONTAINS_S1;

+ // Neither string contains all of the tokens from the other.

+ return DIFFERENT_TOKENS;

}

// static

@@ -152,7 +306,8 @@ std::set<base::string16> AutofillProfileComparator::GetNamePartVariants(

// appends this sub-name and one that appends the initial of this sub-name.

// Duplicates will be discarded when they're added to the variants set.

for (const base::string16& sub_name : sub_names) {

- if (sub_name.empty()) continue;

+ if (sub_name.empty())

+ continue;

std::vector<base::string16> new_variants;

for (const base::string16& variant : variants) {

new_variants.push_back(base::CollapseWhitespace(

@@ -167,7 +322,8 @@ std::set<base::string16> AutofillProfileComparator::GetNamePartVariants(

// initials.

base::string16 initials;

for (const base::string16& sub_name : sub_names) {

- if (sub_name.empty()) continue;

+ if (sub_name.empty())

+ continue;

initials.push_back(sub_name[0]);

}

variants.insert(initials);

@@ -253,7 +409,7 @@ bool AutofillProfileComparator::HaveMergeableCompanyNames(

const base::string16& company_name_2 = NormalizeForComparison(

p2.GetInfo(AutofillType(COMPANY_NAME), app_locale_));

return company_name_1.empty() || company_name_2.empty() ||

- HaveSameTokens(company_name_1, company_name_2);

+ CompareTokens(company_name_1, company_name_2) != DIFFERENT_TOKENS;

}

bool AutofillProfileComparator::HaveMergeablePhoneNumbers(

@@ -300,10 +456,9 @@ bool AutofillProfileComparator::HaveMergeableAddresses(

const AutofillProfile& p2) const {

// If the address are not in the same country, then they're not the same. If

// one of the address countries is unknown/invalid the comparison continues.

- const base::string16& country1 = p1.GetInfo(

- AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_);

- const base::string16& country2 = p2.GetInfo(

- AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_);

+ const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);

+ const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_);

+ const base::string16& country2 = p2.GetInfo(kCountryCode, app_locale_);

if (!country1.empty() && !country2.empty() &&

!case_insensitive_compare_.StringsEqual(country1, country2)) {

return false;

@@ -317,12 +472,11 @@ bool AutofillProfileComparator::HaveMergeableAddresses(

// ----

// If the addresses are definitely not in the same zip/area code then we're

// done. Otherwise,the comparison continues.

+ const AutofillType kZipCode(ADDRESS_HOME_ZIP);

const base::string16& zip1 = NormalizeForComparison(

- p1.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_),

- DISCARD_WHITESPACE);

+ p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);

const base::string16& zip2 = NormalizeForComparison(

- p2.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_),

- DISCARD_WHITESPACE);

+ p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);

if (!zip1.empty() && !zip2.empty() &&

zip1.find(zip2) == base::string16::npos &&

zip2.find(zip1) == base::string16::npos) {

@@ -331,41 +485,47 @@ bool AutofillProfileComparator::HaveMergeableAddresses(

// State

// ------

- // Heuristic: If the match is between non-empty zip codes then we can infer

+ // Heuristic: States are mergeable if one is a (possibly empty) bag of words

+ // subset of the other.

+ //

+ // TODO(rogerm): If the match is between non-empty zip codes then we can infer

// that the two state strings are intended to have the same meaning. This

// handles the cases where we have invalid or poorly formed data in one of the

- // state values (like "Select one", or "CA - California"). Otherwise, we

- // actually have to check if the states map to the the same set of tokens.

- const base::string16& state1 = NormalizeForComparison(

- p1.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_));

- const base::string16& state2 = NormalizeForComparison(

- p2.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_));

- if ((zip1.empty() || zip2.empty()) && !HaveSameTokens(state1, state2)) {

+ // state values (like "Select one", or "CA - California").

+ const AutofillType kState(ADDRESS_HOME_STATE);

+ const base::string16& state1 =

+ NormalizeForComparison(p1.GetInfo(kState, app_locale_));

+ const base::string16& state2 =

+ NormalizeForComparison(p2.GetInfo(kState, app_locale_));

+ if (CompareTokens(state1, state2) == DIFFERENT_TOKENS) {

return false;

}

// City

// ------

- // Heuristic: If the match is between non-empty zip codes then we can infer

+ // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words

+ // subset of the other.

+ //

+ // TODO(rogerm): If the match is between non-empty zip codes then we can infer

// that the two city strings are intended to have the same meaning. This

- // handles the cases where we have a city vs one of its suburbs. Otherwise, we

- // actually have to check if the cities map to the the same set of tokens.

+ // handles the cases where we have a city vs one of its suburbs.

const base::string16& city1 = NormalizeForComparison(

p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));

const base::string16& city2 = NormalizeForComparison(

p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));

- if ((zip1.empty() || zip2.empty()) && !HaveSameTokens(city1, city2)) {

+ if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) {

return false;

}

// Address

// --------

- // Heuristic: Use bag of words comparison on the post-normalized addresses.

+ // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag

+ // of words subset of the other.

const base::string16& address1 = NormalizeForComparison(

p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));

const base::string16& address2 = NormalizeForComparison(

p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));

- if (!HaveSameTokens(address1, address2)) {

+ if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) {

return false;

}