components/autofill/core/browser/autofill_profile_comparator.cc - Issue 2088443002: Expand autofill profile merge logic.

Side by Side Diff: components/autofill/core/browser/autofill_profile_comparator.cc

Issue 2088443002: Expand autofill profile merge logic. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: More name tests. Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« components/autofill/core/browser/autofill_profile_comparator.h ('K') | « components/autofill/core/browser/autofill_profile_comparator.h ('k') | components/autofill/core/browser/autofill_profile_comparator_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2016 The Chromium Authors. All rights reserved.	1 // Copyright 2016 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/autofill/core/browser/autofill_profile_comparator.h"	5 #include "components/autofill/core/browser/autofill_profile_comparator.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <vector>	8 #include <vector>

9	9

	10 #include "base/i18n/case_conversion.h"

10 #include "base/i18n/char_iterator.h"	11 #include "base/i18n/char_iterator.h"

	12 #include "base/strings/string_piece.h"

11 #include "base/strings/string_split.h"	13 #include "base/strings/string_split.h"

12 #include "base/strings/string_util.h"	14 #include "base/strings/string_util.h"

13 #include "base/strings/utf_string_conversion_utils.h"	15 #include "base/strings/utf_string_conversion_utils.h"

14 #include "base/strings/utf_string_conversions.h"	16 #include "base/strings/utf_string_conversions.h"

	17 #include "components/autofill/core/browser/autofill_country.h"

15 #include "components/autofill/core/browser/autofill_data_util.h"	18 #include "components/autofill/core/browser/autofill_data_util.h"

16 #include "third_party/libphonenumber/phonenumber_api.h"	19 #include "third_party/libphonenumber/phonenumber_api.h"

17	20

	21 using i18n::phonenumbers::PhoneNumberUtil;

	22 using base::UTF16ToUTF8;

	23 using base::UTF8ToUTF16;

	24

18 namespace autofill {	25 namespace autofill {

19 namespace {	26 namespace {

20	27

21 const base::char16 kSpace[] = {L' ', L'\0'};	28 const base::char16 kSpace[] = {L' ', L'\0'};

22	29

	30 bool ContainsNewline(base::StringPiece16 text) {

	31 return text.find('\n') != base::StringPiece16::npos;

	32 }

	33

	34 std::ostream& operator<<(std::ostream& os,

	35 const ::i18n::phonenumbers::PhoneNumber& n) {

	36 os << "country_code: " << n.country_code() << " "

	37 << "national_number: " << n.national_number();

	38 if (n.has_extension())

	39 os << " extension: \"" << n.extension() << "\"";

	40 if (n.has_italian_leading_zero())

	41 os << " italian_leading_zero: " << n.italian_leading_zero();

	42 if (n.has_number_of_leading_zeros())

	43 os << " number_of_leading_zeros: " << n.number_of_leading_zeros();

	44 if (n.has_raw_input())

	45 os << " raw_input: \"" << n.raw_input() << "\"";

	46 return os;

	47 }

	48

23 } // namespace	49 } // namespace

24	50

25 AutofillProfileComparator::AutofillProfileComparator(	51 AutofillProfileComparator::AutofillProfileComparator(

26 const base::StringPiece& app_locale)	52 const base::StringPiece& app_locale)

27 : app_locale_(app_locale.data(), app_locale.size()) {	53 : app_locale_(app_locale.data(), app_locale.size()) {

28 // Use ICU transliteration to remove diacritics and fold case.	54 // Use ICU transliteration to remove diacritics and fold case.

29 // See http://userguide.icu-project.org/transforms/general	55 // See http://userguide.icu-project.org/transforms/general

30 UErrorCode status = U_ZERO_ERROR;	56 UErrorCode status = U_ZERO_ERROR;

31 std::unique_ptr<icu::Transliterator> transliterator(	57 std::unique_ptr<icu::Transliterator> transliterator(

32 icu::Transliterator::createInstance(	58 icu::Transliterator::createInstance(

(...skipping 73 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
106 bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1,	132 bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1,

107 const AutofillProfile& p2) const {	133 const AutofillProfile& p2) const {

108 // Sorted in order to relative expense of the tests to fail early and cheaply	134 // Sorted in order to relative expense of the tests to fail early and cheaply

109 // if possible.	135 // if possible.

110 return HaveMergeableEmailAddresses(p1, p2) &&	136 return HaveMergeableEmailAddresses(p1, p2) &&

111 HaveMergeableCompanyNames(p1, p2) &&	137 HaveMergeableCompanyNames(p1, p2) &&

112 HaveMergeablePhoneNumbers(p1, p2) && HaveMergeableNames(p1, p2) &&	138 HaveMergeablePhoneNumbers(p1, p2) && HaveMergeableNames(p1, p2) &&

113 HaveMergeableAddresses(p1, p2);	139 HaveMergeableAddresses(p1, p2);

114 }	140 }

115	141

	142 bool AutofillProfileComparator::MergeNames(const AutofillProfile& p1,

	143 const AutofillProfile& p2,

	144 NameInfo* name_info) const {

	145 DCHECK(HaveMergeableNames(p1, p2));

	146

	147 const AutofillType kFullName(NAME_FULL);

	148 const base::string16& full_name_1 = p1.GetInfo(kFullName, app_locale_);

	149 const base::string16& full_name_2 = p2.GetInfo(kFullName, app_locale_);

	150 const base::string16& normalized_full_name_1 =

	151 NormalizeForComparison(full_name_1);

	152 const base::string16& normalized_full_name_2 =

	153 NormalizeForComparison(full_name_2);

	154

	155 const base::string16* best_name = nullptr;

	156 if (normalized_full_name_1.empty()) {

	157 // p1 has no name, so use the name from p2.

	158 best_name = &full_name_2;

	159 } else if (normalized_full_name_2.empty()) {

	160 // p2 has no name, so use the name from p1.

	161 best_name = &full_name_1;

	162 } else if (IsNameVariantOf(normalized_full_name_1, normalized_full_name_2)) {

	163 // full_name_2 is a variant of full_name_1.

	164 best_name = &full_name_1;

	165 } else {

	166 // If the assertion that p1 and p2 have mergeable names is true, then

	167 // full_name_1 must be a name variant of full_name_2;

	168 best_name = &full_name_2;

	169 }

	170

	171 name_info->SetInfo(AutofillType(NAME_FULL), *best_name, app_locale_);

	172 return true;

	173 }

	174

	175 bool AutofillProfileComparator::MergeEmailAddresses(

	176 const AutofillProfile& p1,

	177 const AutofillProfile& p2,

	178 EmailInfo* email_info) const {

	179 DCHECK(HaveMergeableEmailAddresses(p1, p2));

	180

	181 const AutofillType kEmailAddress(EMAIL_ADDRESS);

	182 const base::string16& e1 = p1.GetInfo(kEmailAddress, app_locale_);

	183 const base::string16& e2 = p2.GetInfo(kEmailAddress, app_locale_);

	184 const base::string16* best = nullptr;

	185

	186 if (e1.empty()) {

	187 best = &e2;

	188 } else if (e2.empty()) {

	189 best = &e1;

	190 } else {

	191 best = p1.use_date() > p2.use_date() ? &e1 : &e2;

	192 }

	193

	194 email_info->SetInfo(kEmailAddress, *best, app_locale_);

	195 return true;

	196 }

	197

	198 bool AutofillProfileComparator::MergeCompanyNames(

	199 const AutofillProfile& p1,

	200 const AutofillProfile& p2,

	201 CompanyInfo* company_info) const {

	202 const AutofillType kCompanyName(COMPANY_NAME);

	203 const base::string16& c1 = p1.GetInfo(kCompanyName, app_locale_);

	204 const base::string16& c2 = p2.GetInfo(kCompanyName, app_locale_);

	205 const base::string16* best = nullptr;

	206

	207 DCHECK(HaveMergeableCompanyNames(p1, p2))

	208 << "Company names are not mergeable: '" << c1 << "' vs '" << c2 << "'";

	209

	210 CompareTokensResult result =

	211 CompareTokens(NormalizeForComparison(c1), NormalizeForComparison(c2));

	212 switch (result) {

	213 case DIFFERENT_TOKENS:

	214 default:

	215 NOTREACHED();

	216 return false;

	217 case S1_CONTAINS_S2:

	218 best = &c1;

	219 break;

	220 case S2_CONTAINS_S1:

	221 best = &c2;

	222 break;

	223 case SAME_TOKENS:

	224 best = p1.use_date() > p2.use_date() ? &c1 : &c2;

	225 break;

	226 }

	227

	228 company_info->SetInfo(kCompanyName, *best, app_locale_);

	229 return true;

	230 }

	231

	232 bool AutofillProfileComparator::MergePhoneNumbers(

	233 const AutofillProfile& p1,

	234 const AutofillProfile& p2,

	235 PhoneNumber* phone_number) const {

	236 const ServerFieldType kWholePhoneNumber = PHONE_HOME_WHOLE_NUMBER;

	237 const base::string16& s1 = p1.GetRawInfo(kWholePhoneNumber);

	238 const base::string16& s2 = p2.GetRawInfo(kWholePhoneNumber);

	239

	240 DCHECK(HaveMergeablePhoneNumbers(p1, p2))

	241 << "Phone numbers are not mergeable: '" << s1 << "' vs '" << s2 << "'";

	242

	243 if (s1.empty()) {

	244 phone_number->SetRawInfo(kWholePhoneNumber, s2);

	245 return true;

	246 }

	247

	248 if (s2.empty() \|\| s1 == s2) {

	249 phone_number->SetRawInfo(kWholePhoneNumber, s1);

	250 return true;

	251 }

	252

	253 // Figure out a country code hint.

	254 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);

	255 std::string region = UTF16ToUTF8(GetNonEmptyOf(p1, p2, kCountryCode));

	256 if (region.empty())

	257 region = AutofillCountry::CountryCodeForLocale(app_locale_);

	258

	259 // Parse the phone numbers.

	260 PhoneNumberUtil* phone_util = PhoneNumberUtil::GetInstance();

	261

	262 ::i18n::phonenumbers::PhoneNumber n1;

	263 if (phone_util->ParseAndKeepRawInput(UTF16ToUTF8(s1), region, &n1) !=

	264 PhoneNumberUtil::NO_PARSING_ERROR) {

	265 return false;

	266 }

	267

	268 ::i18n::phonenumbers::PhoneNumber n2;

	269 if (phone_util->ParseAndKeepRawInput(UTF16ToUTF8(s2), region, &n2) !=

	270 PhoneNumberUtil::NO_PARSING_ERROR) {

	271 return false;

	272 }

	273

	274 ::i18n::phonenumbers::PhoneNumber merged_number;

	275 DCHECK_EQ(n1.country_code(), n2.country_code());

	276 merged_number.set_country_code(n1.country_code());

	277 merged_number.set_national_number(

	278 std::max(n1.national_number(), n2.national_number()));

	279 if (n1.has_extension() && !n1.extension().empty()) {

	280 merged_number.set_extension(n1.extension());

	281 } else if (n2.has_extension() && !n2.extension().empty()) {

	282 merged_number.set_extension(n2.extension());

	283 }

	284 if (n1.has_italian_leading_zero() \|\| n2.has_italian_leading_zero()) {

	285 merged_number.set_italian_leading_zero(n1.italian_leading_zero() \|\|

	286 n2.italian_leading_zero());

	287 }

	288 if (n1.has_number_of_leading_zeros() \|\| n2.has_number_of_leading_zeros()) {

	289 merged_number.set_number_of_leading_zeros(

	290 std::max(n1.number_of_leading_zeros(), n2.number_of_leading_zeros()));

	291 }

	292

	293 PhoneNumberUtil::PhoneNumberFormat format =

	294 region.empty() ? PhoneNumberUtil::NATIONAL
	tmartino 2016/06/27 17:56:46 It seems like region.empty() can't ever be true he It seems like region.empty() can't ever be true here, because we do the same check and populate with a default value on lines 256-257. Roger McFarlane (Chromium) 2016/06/28 17:26:47 I wasn't assuming that the default was guaranteed Show quoted text On 2016/06/27 17:56:46, tmartino wrote: > It seems like region.empty() can't ever be true here, because we do the same > check and populate with a default value on lines 256-257. I wasn't assuming that the default was guaranteed to be non-empty; but, yes, you're right, it will probably never be empty. Unless you feel really strongly, though, I'd prefer to leave this as is.
	295 : PhoneNumberUtil::INTERNATIONAL;

	296

	297 std::string new_number;

	298 phone_util->Format(merged_number, format, &new_number);

	299

	300 VLOG(1) << "n1 = {" << n1 << "}";

	301 VLOG(1) << "n2 = {" << n2 << "}";

	302 VLOG(1) << "merged_number = {" << merged_number << "}";

	303 VLOG(1) << "new_number = \"" << new_number << "\"";

	304

	305 // Check if it's a North American number that's missing the area code.

	306 // Libphonenumber doesn't know how to format short numbers; it will still

	307 // include the country code prefix.

	308 if (merged_number.country_code() == 1 &&

	309 merged_number.national_number() <= 9999999 &&

	310 new_number.find("+1") == 0) {

	311 size_t offset = 2; // The char just after "+1".

	312 while (offset < new_number.size() &&

	313 base::IsAsciiWhitespace(new_number[offset])) {

	314 ++offset;

	315 }

	316 new_number = new_number.substr(offset);

	317 }

	318

	319 phone_number->SetRawInfo(kWholePhoneNumber, UTF8ToUTF16(new_number));

	320

	321 return true;

	322 }

	323

	324 bool AutofillProfileComparator::MergeAddresses(const AutofillProfile& p1,

	325 const AutofillProfile& p2,

	326 Address* address) const {

	327 DCHECK(HaveMergeableAddresses(p1, p2));

	328

	329 // One of the countries is empty or they are the same modulo case, so we just

	330 // have to find the non-empty one, if any.

	331 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);

	332 address->SetInfo(kCountryCode,

	333 base::i18n::ToUpper(GetNonEmptyOf(p1, p2, kCountryCode)),

	334 app_locale_);

	335

	336 // One of the zip codes is empty, they are the same, or one is a substring

	337 // of the other. So, we have to find the longest one.

	338 const AutofillType kZipCode(ADDRESS_HOME_ZIP);

	339 const base::string16& zip1 = p1.GetInfo(kZipCode, app_locale_);

	340 const base::string16& zip2 = p2.GetInfo(kZipCode, app_locale_);

	341 address->SetInfo(kZipCode, (zip1.size() > zip2.size() ? zip1 : zip2),

	342 app_locale_);

	343

	344 // One of the states is empty or one of the states has a subset of tokens from

	345 // the other. Pick the non-empty state that is shorter. This is usually the

	346 // abbreviated one.

	347 const AutofillType kState(ADDRESS_HOME_STATE);

	348 const base::string16& state1 = p1.GetInfo(kState, app_locale_);

	349 const base::string16& state2 = p2.GetInfo(kState, app_locale_);

	350 if (state1.empty()) {

	351 address->SetInfo(kState, state2, app_locale_);

	352 } else if (state2.empty()) {

	353 address->SetInfo(kState, state1, app_locale_);

	354 } else {

	355 address->SetInfo(kState, (state1.size() < state2.size() ? state1 : state2),

	356 app_locale_);

	357 }

	358

	359 // One of the cities is empty or one of the cities has a subset of tokens from

	360 // the other. Pick the city name with more tokens; this is usually the most

	361 // explicit one.

	362 const AutofillType kCity(ADDRESS_HOME_CITY);

	363 const base::string16& city1 = p1.GetInfo(kCity, app_locale_);

	364 const base::string16& city2 = p2.GetInfo(kCity, app_locale_);

	365 if (city1.empty()) {

	366 address->SetInfo(kCity, city2, app_locale_);

	367 } else if (city2.empty()) {

	368 address->SetInfo(kCity, city1, app_locale_);

	369 } else {

	370 // Prefer the one with more tokens.

	371 CompareTokensResult result = CompareTokens(NormalizeForComparison(city1),

	372 NormalizeForComparison(city2));

	373 switch (result) {

	374 case SAME_TOKENS:

	375 // They have the same set of unique tokens. Let's pick the one that's

	376 // longer.

	377 address->SetInfo(kCity, (city1.size() > city2.size() ? city1 : city2),

	378 app_locale_);

	379 break;

	380 case S1_CONTAINS_S2:

	381 // city1 has more unique tokens than city2.

	382 address->SetInfo(kCity, city1, app_locale_);

	383 break;

	384 case S2_CONTAINS_S1:

	385 // city2 has more unique tokens than city1.

	386 address->SetInfo(kCity, city2, app_locale_);

	387 break;

	388 case DIFFERENT_TOKENS:

	389 default:

	390 // The addresses aren't mergeable and we shouldn't be doing any of

	391 // this.

	392 NOTREACHED();

	393 return false;

	394 }

	395 }

	396

	397 // One of the addresses is empty or one of the addresses has a subset of

	398 // tokens from the other. Pick the non-em that is shorter. This is usually the
	tmartino 2016/06/27 17:56:46 This says pick the shorter, but the logic all seem This says pick the shorter, but the logic all seems to be picking the longer. Roger McFarlane (Chromium) 2016/06/28 17:26:46 Done. Show quoted text On 2016/06/27 17:56:46, tmartino wrote: > This says pick the shorter, but the logic all seems to be picking the longer. Done.
	399 // abbreviated one.

	400 const AutofillType kStreetAddress(ADDRESS_HOME_STREET_ADDRESS);

	401 const base::string16& address1 = p1.GetInfo(kStreetAddress, app_locale_);

	402 const base::string16& address2 = p2.GetInfo(kStreetAddress, app_locale_);

	403 // If one of the addresses is empty then use the other.

	404 if (address1.empty()) {

	405 address->SetInfo(kStreetAddress, address2, app_locale_);

	406 } else if (address2.empty()) {

	407 address->SetInfo(kStreetAddress, address1, app_locale_);

	408 } else {

	409 // Prefer the multi-line address if one is multi-line and the other isn't.

	410 bool address1_multiline = ContainsNewline(address1);

	411 bool address2_multiline = ContainsNewline(address2);

	412 if (address1_multiline && !address2_multiline) {

	413 address->SetInfo(kStreetAddress, address1, app_locale_);

	414 } else if (address2_multiline && !address1_multiline) {

	415 address->SetInfo(kStreetAddress, address2, app_locale_);

	416 } else {

	417 // Prefer the one with more tokens if they're both single-line or both

	418 // multi-line addresses.

	419 CompareTokensResult result = CompareTokens(

	420 NormalizeForComparison(address1), NormalizeForComparison(address2));

	421 switch (result) {

	422 case SAME_TOKENS:

	423 // They have the same set of unique tokens. Let's pick the one that's

	424 // longer.

	425 address->SetInfo(

	426 kStreetAddress,

	427 (address1.size() > address2.size() ? address1 : address2),

	428 app_locale_);

	429 break;

	430 case S1_CONTAINS_S2:

	431 // address1 has more unique tokens than address2.

	432 address->SetInfo(kStreetAddress, address1, app_locale_);

	433 break;

	434 case S2_CONTAINS_S1:

	435 // address2 has more unique tokens than address1.

	436 address->SetInfo(kStreetAddress, address1, app_locale_);

	437 break;

	438 case DIFFERENT_TOKENS:

	439 default:

	440 // The addresses aren't mergeable and we shouldn't be doing any of

	441 // this.

	442 NOTREACHED();

	443 return false;

	444 }

	445 }

	446 }

	447 return true;

	448 }

	449

116 // static	450 // static

117 std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens(	451 std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens(

118 base::StringPiece16 s) {	452 base::StringPiece16 s) {

119 std::vector<base::StringPiece16> tokens = base::SplitStringPiece(	453 std::vector<base::StringPiece16> tokens = base::SplitStringPiece(

120 s, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);	454 s, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

121 return std::set<base::StringPiece16>(tokens.begin(), tokens.end());	455 return std::set<base::StringPiece16>(tokens.begin(), tokens.end());

122 }	456 }

123	457

124 // static	458 // static

125 bool AutofillProfileComparator::HaveSameTokens(base::StringPiece16 s1,	459 AutofillProfileComparator::CompareTokensResult

126 base::StringPiece16 s2) {	460 AutofillProfileComparator::CompareTokens(base::StringPiece16 s1,

	461 base::StringPiece16 s2) {

	462 // Note: std::include() expects the items in each range to be in sorted order,

	463 // hence the use of std::set<> instead of std::unordered_set<>.

127 std::set<base::StringPiece16> t1 = UniqueTokens(s1);	464 std::set<base::StringPiece16> t1 = UniqueTokens(s1);

128 std::set<base::StringPiece16> t2 = UniqueTokens(s2);	465 std::set<base::StringPiece16> t2 = UniqueTokens(s2);

129	466

130 // Note: std::include() expects the items in each range to be in sorted order,	467 // Does s1 contains all of the tokens in s2? As a special case, return 0 if

131 // hence the use of std::set<> instead of std::unordered_set<>.	468 // the two sets are exactly the samel.
	tmartino 2016/06/27 17:56:46 nit: /s/samel/same nit: /s/samel/same Roger McFarlane (Chromium) 2016/06/28 17:26:46 Done. Show quoted text On 2016/06/27 17:56:46, tmartino wrote: > nit: /s/samel/same Done.
132 return std::includes(t1.begin(), t1.end(), t2.begin(), t2.end()) \|\|	469 if (std::includes(t1.begin(), t1.end(), t2.begin(), t2.end()))

133 std::includes(t2.begin(), t2.end(), t1.begin(), t1.end());	470 return t1.size() == t2.size() ? SAME_TOKENS : S1_CONTAINS_S2;

	471

	472 // Does s2 contain all of the tokens in s1?

	473 if (std::includes(t2.begin(), t2.end(), t1.begin(), t1.end()))

	474 return S2_CONTAINS_S1;

	475

	476 // Neither string contains all of the tokens from the other.

	477 return DIFFERENT_TOKENS;

	478 }

	479

	480 base::string16 AutofillProfileComparator::GetNonEmptyOf(

	481 const AutofillProfile& p1,

	482 const AutofillProfile& p2,

	483 AutofillType t) const {

	484 const base::string16& s1 = p1.GetInfo(t, app_locale_);

	485 if (!s1.empty())

	486 return s1;

	487 return p2.GetInfo(t, app_locale_);

134 }	488 }

135	489

136 // static	490 // static

137 std::set<base::string16> AutofillProfileComparator::GetNamePartVariants(	491 std::set<base::string16> AutofillProfileComparator::GetNamePartVariants(

138 const base::string16& name_part) {	492 const base::string16& name_part) {

139 const size_t kMaxSupportedSubNames = 8;	493 const size_t kMaxSupportedSubNames = 8;

140	494

141 std::vector<base::string16> sub_names = base::SplitString(	495 std::vector<base::string16> sub_names = base::SplitString(

142 name_part, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);	496 name_part, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

143	497

144 // Limit the number of sub-names we support (to constrain memory usage);	498 // Limit the number of sub-names we support (to constrain memory usage);

145 if (sub_names.size() > kMaxSupportedSubNames)	499 if (sub_names.size() > kMaxSupportedSubNames)

146 return {name_part};	500 return {name_part};

147	501

148 // Start with the empty string as a variant.	502 // Start with the empty string as a variant.

149 std::set<base::string16> variants = {base::EmptyString16()};	503 std::set<base::string16> variants = {base::EmptyString16()};

150	504

151 // For each sub-name, add a variant of all the already existing variants that	505 // For each sub-name, add a variant of all the already existing variants that

152 // appends this sub-name and one that appends the initial of this sub-name.	506 // appends this sub-name and one that appends the initial of this sub-name.

153 // Duplicates will be discarded when they're added to the variants set.	507 // Duplicates will be discarded when they're added to the variants set.

154 for (const base::string16& sub_name : sub_names) {	508 for (const base::string16& sub_name : sub_names) {

155 if (sub_name.empty()) continue;	509 if (sub_name.empty())

	510 continue;

156 std::vector<base::string16> new_variants;	511 std::vector<base::string16> new_variants;

157 for (const base::string16& variant : variants) {	512 for (const base::string16& variant : variants) {

158 new_variants.push_back(base::CollapseWhitespace(	513 new_variants.push_back(base::CollapseWhitespace(

159 base::JoinString({variant, sub_name}, kSpace), true));	514 base::JoinString({variant, sub_name}, kSpace), true));

160 new_variants.push_back(base::CollapseWhitespace(	515 new_variants.push_back(base::CollapseWhitespace(

161 base::JoinString({variant, sub_name.substr(0, 1)}, kSpace), true));	516 base::JoinString({variant, sub_name.substr(0, 1)}, kSpace), true));

162 }	517 }

163 variants.insert(new_variants.begin(), new_variants.end());	518 variants.insert(new_variants.begin(), new_variants.end());

164 }	519 }

165	520

166 // As a common case, also add the variant that just concatenates all of the	521 // As a common case, also add the variant that just concatenates all of the

167 // initials.	522 // initials.

168 base::string16 initials;	523 base::string16 initials;

169 for (const base::string16& sub_name : sub_names) {	524 for (const base::string16& sub_name : sub_names) {

170 if (sub_name.empty()) continue;	525 if (sub_name.empty())

	526 continue;

171 initials.push_back(sub_name[0]);	527 initials.push_back(sub_name[0]);

172 }	528 }

173 variants.insert(initials);	529 variants.insert(initials);

174	530

175 // And, we're done.	531 // And, we're done.

176 return variants;	532 return variants;

177 }	533 }

178	534

179 bool AutofillProfileComparator::IsNameVariantOf(	535 bool AutofillProfileComparator::IsNameVariantOf(

180 const base::string16& full_name_1,	536 const base::string16& full_name_1,

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
246 }	602 }

247	603

248 bool AutofillProfileComparator::HaveMergeableCompanyNames(	604 bool AutofillProfileComparator::HaveMergeableCompanyNames(

249 const AutofillProfile& p1,	605 const AutofillProfile& p1,

250 const AutofillProfile& p2) const {	606 const AutofillProfile& p2) const {

251 const base::string16& company_name_1 = NormalizeForComparison(	607 const base::string16& company_name_1 = NormalizeForComparison(

252 p1.GetInfo(AutofillType(COMPANY_NAME), app_locale_));	608 p1.GetInfo(AutofillType(COMPANY_NAME), app_locale_));

253 const base::string16& company_name_2 = NormalizeForComparison(	609 const base::string16& company_name_2 = NormalizeForComparison(

254 p2.GetInfo(AutofillType(COMPANY_NAME), app_locale_));	610 p2.GetInfo(AutofillType(COMPANY_NAME), app_locale_));

255 return company_name_1.empty() \|\| company_name_2.empty() \|\|	611 return company_name_1.empty() \|\| company_name_2.empty() \|\|

256 HaveSameTokens(company_name_1, company_name_2);	612 CompareTokens(company_name_1, company_name_2) != DIFFERENT_TOKENS;

257 }	613 }

258	614

259 bool AutofillProfileComparator::HaveMergeablePhoneNumbers(	615 bool AutofillProfileComparator::HaveMergeablePhoneNumbers(

260 const AutofillProfile& p1,	616 const AutofillProfile& p1,

261 const AutofillProfile& p2) const {	617 const AutofillProfile& p2) const {

262 // We work with the raw phone numbers to avoid losing any helpful information	618 // We work with the raw phone numbers to avoid losing any helpful information

263 // as we parse.	619 // as we parse.

264 const base::string16& raw_phone_1 = p1.GetRawInfo(PHONE_HOME_WHOLE_NUMBER);	620 const base::string16& raw_phone_1 = p1.GetRawInfo(PHONE_HOME_WHOLE_NUMBER);

265 const base::string16& raw_phone_2 = p2.GetRawInfo(PHONE_HOME_WHOLE_NUMBER);	621 const base::string16& raw_phone_2 = p2.GetRawInfo(PHONE_HOME_WHOLE_NUMBER);

266	622

267 // Are the two phone numbers trivially mergeable?	623 // Are the two phone numbers trivially mergeable?

268 if (raw_phone_1.empty() \|\| raw_phone_2.empty() \|\|	624 if (raw_phone_1.empty() \|\| raw_phone_2.empty() \|\|

269 raw_phone_1 == raw_phone_2) {	625 raw_phone_1 == raw_phone_2) {

270 return true;	626 return true;

271 }	627 }

272	628

273 // TODO(rogerm): Modify ::autofill::i18n::PhoneNumbersMatch to support	629 // TODO(rogerm): Modify ::autofill::i18n::PhoneNumbersMatch to support

274 // SHORT_NSN_MATCH and just call that instead of accessing the underlying	630 // SHORT_NSN_MATCH and just call that instead of accessing the underlying

275 // utility library directly?	631 // utility library directly?

276	632

277 // The phone number util library needs the numbers in utf8.	633 // The phone number util library needs the numbers in utf8.

278 const std::string phone_1 = base::UTF16ToUTF8(raw_phone_1);	634 const std::string phone_1 = base::UTF16ToUTF8(raw_phone_1);

279 const std::string phone_2 = base::UTF16ToUTF8(raw_phone_2);	635 const std::string phone_2 = base::UTF16ToUTF8(raw_phone_2);

280	636

281 // Parse and compare the phone numbers.	637 // Parse and compare the phone numbers.

282 using ::i18n::phonenumbers::PhoneNumberUtil;

283 PhoneNumberUtil* phone_util = PhoneNumberUtil::GetInstance();	638 PhoneNumberUtil* phone_util = PhoneNumberUtil::GetInstance();

284 switch (phone_util->IsNumberMatchWithTwoStrings(phone_1, phone_2)) {	639 switch (phone_util->IsNumberMatchWithTwoStrings(phone_1, phone_2)) {

285 case PhoneNumberUtil::INVALID_NUMBER:	640 case PhoneNumberUtil::INVALID_NUMBER:

286 case PhoneNumberUtil::NO_MATCH:	641 case PhoneNumberUtil::NO_MATCH:

287 return false;	642 return false;

288 case PhoneNumberUtil::SHORT_NSN_MATCH:	643 case PhoneNumberUtil::SHORT_NSN_MATCH:

289 case PhoneNumberUtil::NSN_MATCH:	644 case PhoneNumberUtil::NSN_MATCH:

290 case PhoneNumberUtil::EXACT_MATCH:	645 case PhoneNumberUtil::EXACT_MATCH:

291 return true;	646 return true;

292 }	647 }

293	648

294 NOTREACHED();	649 NOTREACHED();

295 return false;	650 return false;

296 }	651 }

297	652

298 bool AutofillProfileComparator::HaveMergeableAddresses(	653 bool AutofillProfileComparator::HaveMergeableAddresses(

299 const AutofillProfile& p1,	654 const AutofillProfile& p1,

300 const AutofillProfile& p2) const {	655 const AutofillProfile& p2) const {

301 // If the address are not in the same country, then they're not the same. If	656 // If the address are not in the same country, then they're not the same. If

302 // one of the address countries is unknown/invalid the comparison continues.	657 // one of the address countries is unknown/invalid the comparison continues.

303 const base::string16& country1 = p1.GetInfo(	658 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);

304 AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_);	659 const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_);

305 const base::string16& country2 = p2.GetInfo(	660 const base::string16& country2 = p2.GetInfo(kCountryCode, app_locale_);

306 AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_);

307 if (!country1.empty() && !country2.empty() &&	661 if (!country1.empty() && !country2.empty() &&

308 !case_insensitive_compare_.StringsEqual(country1, country2)) {	662 !case_insensitive_compare_.StringsEqual(country1, country2)) {

309 return false;	663 return false;

310 }	664 }

311	665

312 // TODO(rogerm): Lookup the normalization rules for the (common) country of	666 // TODO(rogerm): Lookup the normalization rules for the (common) country of

313 // the address. The rules should be applied post NormalizeForComparison to	667 // the address. The rules should be applied post NormalizeForComparison to

314 // the state, city, and address bag of words comparisons.	668 // the state, city, and address bag of words comparisons.

315	669

316 // Zip	670 // Zip

317 // ----	671 // ----

318 // If the addresses are definitely not in the same zip/area code then we're	672 // If the addresses are definitely not in the same zip/area code then we're

319 // done. Otherwise,the comparison continues.	673 // done. Otherwise,the comparison continues.

	674 const AutofillType kZipCode(ADDRESS_HOME_ZIP);

320 const base::string16& zip1 = NormalizeForComparison(	675 const base::string16& zip1 = NormalizeForComparison(

321 p1.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_),	676 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);

322 DISCARD_WHITESPACE);

323 const base::string16& zip2 = NormalizeForComparison(	677 const base::string16& zip2 = NormalizeForComparison(

324 p2.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_),	678 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);

325 DISCARD_WHITESPACE);

326 if (!zip1.empty() && !zip2.empty() &&	679 if (!zip1.empty() && !zip2.empty() &&

327 zip1.find(zip2) == base::string16::npos &&	680 zip1.find(zip2) == base::string16::npos &&

328 zip2.find(zip1) == base::string16::npos) {	681 zip2.find(zip1) == base::string16::npos) {

329 return false;	682 return false;

330 }	683 }

331	684

332 // State	685 // State

333 // ------	686 // ------

334 // Heuristic: If the match is between non-empty zip codes then we can infer	687 // Heuristic: States are mergeable if one is a (possibly empty) bag of words

	688 // subset of the other.
	tmartino 2016/06/27 17:56:46 It looks to me like we aren't handling abbreviatio It looks to me like we aren't handling abbreviations here? So "MN" and "Minnesota" would be different states? Also, does the bag-of-words subset heuristic handle a case like "West Virginia" and "Virginia"? Roger McFarlane (Chromium) 2016/06/28 17:26:47 Added provisional support for state (US). Show quoted text On 2016/06/27 17:56:46, tmartino wrote: > It looks to me like we aren't handling abbreviations here? So "MN" and > "Minnesota" would be different states? > > Also, does the bag-of-words subset heuristic handle a case like "West Virginia" > and "Virginia"? Added provisional support for state (US).
	689 //

	690 // TODO(rogerm): If the match is between non-empty zip codes then we can infer

335 // that the two state strings are intended to have the same meaning. This	691 // that the two state strings are intended to have the same meaning. This

336 // handles the cases where we have invalid or poorly formed data in one of the	692 // handles the cases where we have invalid or poorly formed data in one of the

337 // state values (like "Select one", or "CA - California"). Otherwise, we	693 // state values (like "Select one", or "CA - California").

338 // actually have to check if the states map to the the same set of tokens.	694 const AutofillType kState(ADDRESS_HOME_STATE);

339 const base::string16& state1 = NormalizeForComparison(	695 const base::string16& state1 =

340 p1.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_));	696 NormalizeForComparison(p1.GetInfo(kState, app_locale_));

341 const base::string16& state2 = NormalizeForComparison(	697 const base::string16& state2 =

342 p2.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_));	698 NormalizeForComparison(p2.GetInfo(kState, app_locale_));

343 if ((zip1.empty() \|\| zip2.empty()) && !HaveSameTokens(state1, state2)) {	699 if (CompareTokens(state1, state2) == DIFFERENT_TOKENS) {

344 return false;	700 return false;

345 }	701 }

346	702

347 // City	703 // City

348 // ------	704 // ------

349 // Heuristic: If the match is between non-empty zip codes then we can infer	705 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words

	706 // subset of the other.

	707 //

	708 // TODO(rogerm): If the match is between non-empty zip codes then we can infer

350 // that the two city strings are intended to have the same meaning. This	709 // that the two city strings are intended to have the same meaning. This

351 // handles the cases where we have a city vs one of its suburbs. Otherwise, we	710 // handles the cases where we have a city vs one of its suburbs.

352 // actually have to check if the cities map to the the same set of tokens.

353 const base::string16& city1 = NormalizeForComparison(	711 const base::string16& city1 = NormalizeForComparison(

354 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));	712 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));

355 const base::string16& city2 = NormalizeForComparison(	713 const base::string16& city2 = NormalizeForComparison(

356 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));	714 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));

357 if ((zip1.empty() \|\| zip2.empty()) && !HaveSameTokens(city1, city2)) {	715 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) {

358 return false;	716 return false;

359 }	717 }

360	718

361 // Address	719 // Address

362 // --------	720 // --------

363 // Heuristic: Use bag of words comparison on the post-normalized addresses.	721 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag

	722 // of words subset of the other.

364 const base::string16& address1 = NormalizeForComparison(	723 const base::string16& address1 = NormalizeForComparison(

365 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));	724 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));

366 const base::string16& address2 = NormalizeForComparison(	725 const base::string16& address2 = NormalizeForComparison(

367 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));	726 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));

368 if (!HaveSameTokens(address1, address2)) {	727 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) {

369 return false;	728 return false;

370 }	729 }

371	730

372 return true;	731 return true;

373 }	732 }

374	733

375 } // namespace autofill	734 } // namespace autofill

OLD	NEW