components/autofill/core/browser/autofill_profile_comparator.cc - Issue 2088443002: Expand autofill profile merge logic.

Side by Side Diff: components/autofill/core/browser/autofill_profile_comparator.cc

Issue 2088443002: Expand autofill profile merge logic. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Tommy's comments Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « components/autofill/core/browser/autofill_profile_comparator.h ('k') | components/autofill/core/browser/autofill_profile_comparator_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2016 The Chromium Authors. All rights reserved.	1 // Copyright 2016 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/autofill/core/browser/autofill_profile_comparator.h"	5 #include "components/autofill/core/browser/autofill_profile_comparator.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <vector>	8 #include <vector>

9	9

	10 #include "base/i18n/case_conversion.h"

10 #include "base/i18n/char_iterator.h"	11 #include "base/i18n/char_iterator.h"

	12 #include "base/strings/string_piece.h"

11 #include "base/strings/string_split.h"	13 #include "base/strings/string_split.h"

12 #include "base/strings/string_util.h"	14 #include "base/strings/string_util.h"

13 #include "base/strings/utf_string_conversion_utils.h"	15 #include "base/strings/utf_string_conversion_utils.h"

14 #include "base/strings/utf_string_conversions.h"	16 #include "base/strings/utf_string_conversions.h"

	17 #include "components/autofill/core/browser/autofill_country.h"

15 #include "components/autofill/core/browser/autofill_data_util.h"	18 #include "components/autofill/core/browser/autofill_data_util.h"

	19 #include "components/autofill/core/browser/state_names.h"

16 #include "third_party/libphonenumber/phonenumber_api.h"	20 #include "third_party/libphonenumber/phonenumber_api.h"

17	21

	22 using i18n::phonenumbers::PhoneNumberUtil;

	23 using base::UTF16ToUTF8;

	24 using base::UTF8ToUTF16;

	25

18 namespace autofill {	26 namespace autofill {

19 namespace {	27 namespace {

20	28

21 const base::char16 kSpace[] = {L' ', L'\0'};	29 const base::char16 kSpace[] = {L' ', L'\0'};

	30 const base::char16 kUS[] = {L'U', L'S', L'\0'};

	31

	32 bool ContainsNewline(base::StringPiece16 text) {

	33 return text.find('\n') != base::StringPiece16::npos;

	34 }

	35

	36 std::ostream& operator<<(std::ostream& os,

	37 const ::i18n::phonenumbers::PhoneNumber& n) {

	38 os << "country_code: " << n.country_code() << " "

	39 << "national_number: " << n.national_number();

	40 if (n.has_extension())

	41 os << " extension: \"" << n.extension() << "\"";

	42 if (n.has_italian_leading_zero())

	43 os << " italian_leading_zero: " << n.italian_leading_zero();

	44 if (n.has_number_of_leading_zeros())

	45 os << " number_of_leading_zeros: " << n.number_of_leading_zeros();

	46 if (n.has_raw_input())

	47 os << " raw_input: \"" << n.raw_input() << "\"";

	48 return os;

	49 }

22	50

23 } // namespace	51 } // namespace

24	52

25 AutofillProfileComparator::AutofillProfileComparator(	53 AutofillProfileComparator::AutofillProfileComparator(

26 const base::StringPiece& app_locale)	54 const base::StringPiece& app_locale)

27 : app_locale_(app_locale.data(), app_locale.size()) {	55 : app_locale_(app_locale.data(), app_locale.size()) {

28 // Use ICU transliteration to remove diacritics and fold case.	56 // Use ICU transliteration to remove diacritics and fold case.

29 // See http://userguide.icu-project.org/transforms/general	57 // See http://userguide.icu-project.org/transforms/general

30 UErrorCode status = U_ZERO_ERROR;	58 UErrorCode status = U_ZERO_ERROR;

31 std::unique_ptr<icu::Transliterator> transliterator(	59 std::unique_ptr<icu::Transliterator> transliterator(

(...skipping 74 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
106 bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1,	134 bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1,

107 const AutofillProfile& p2) const {	135 const AutofillProfile& p2) const {

108 // Sorted in order to relative expense of the tests to fail early and cheaply	136 // Sorted in order to relative expense of the tests to fail early and cheaply

109 // if possible.	137 // if possible.

110 return HaveMergeableEmailAddresses(p1, p2) &&	138 return HaveMergeableEmailAddresses(p1, p2) &&

111 HaveMergeableCompanyNames(p1, p2) &&	139 HaveMergeableCompanyNames(p1, p2) &&

112 HaveMergeablePhoneNumbers(p1, p2) && HaveMergeableNames(p1, p2) &&	140 HaveMergeablePhoneNumbers(p1, p2) && HaveMergeableNames(p1, p2) &&

113 HaveMergeableAddresses(p1, p2);	141 HaveMergeableAddresses(p1, p2);

114 }	142 }

115	143

	144 bool AutofillProfileComparator::MergeNames(const AutofillProfile& p1,

	145 const AutofillProfile& p2,

	146 NameInfo* name_info) const {

	147 DCHECK(HaveMergeableNames(p1, p2));

	148

	149 const AutofillType kFullName(NAME_FULL);

	150 const base::string16& full_name_1 = p1.GetInfo(kFullName, app_locale_);

	151 const base::string16& full_name_2 = p2.GetInfo(kFullName, app_locale_);

	152 const base::string16& normalized_full_name_1 =

	153 NormalizeForComparison(full_name_1);

	154 const base::string16& normalized_full_name_2 =

	155 NormalizeForComparison(full_name_2);

	156

	157 const base::string16* best_name = nullptr;

	158 if (normalized_full_name_1.empty()) {

	159 // p1 has no name, so use the name from p2.

	160 best_name = &full_name_2;

	161 } else if (normalized_full_name_2.empty()) {

	162 // p2 has no name, so use the name from p1.

	163 best_name = &full_name_1;

	164 } else if (IsNameVariantOf(normalized_full_name_1, normalized_full_name_2)) {

	165 // full_name_2 is a variant of full_name_1.

	166 best_name = &full_name_1;

	167 } else {

	168 // If the assertion that p1 and p2 have mergeable names is true, then

	169 // full_name_1 must be a name variant of full_name_2;

	170 best_name = &full_name_2;

	171 }

	172

	173 name_info->SetInfo(AutofillType(NAME_FULL), *best_name, app_locale_);

	174 return true;

	175 }

	176

	177 bool AutofillProfileComparator::MergeEmailAddresses(

	178 const AutofillProfile& p1,

	179 const AutofillProfile& p2,

	180 EmailInfo* email_info) const {

	181 DCHECK(HaveMergeableEmailAddresses(p1, p2));

	182

	183 const AutofillType kEmailAddress(EMAIL_ADDRESS);

	184 const base::string16& e1 = p1.GetInfo(kEmailAddress, app_locale_);

	185 const base::string16& e2 = p2.GetInfo(kEmailAddress, app_locale_);

	186 const base::string16* best = nullptr;

	187

	188 if (e1.empty()) {

	189 best = &e2;

	190 } else if (e2.empty()) {

	191 best = &e1;

	192 } else {

	193 best = p2.use_date() > p1.use_date() ? &e2 : &e1;

	194 }

	195

	196 email_info->SetInfo(kEmailAddress, *best, app_locale_);

	197 return true;

	198 }

	199

	200 bool AutofillProfileComparator::MergeCompanyNames(

	201 const AutofillProfile& p1,

	202 const AutofillProfile& p2,

	203 CompanyInfo* company_info) const {

	204 const AutofillType kCompanyName(COMPANY_NAME);

	205 const base::string16& c1 = p1.GetInfo(kCompanyName, app_locale_);

	206 const base::string16& c2 = p2.GetInfo(kCompanyName, app_locale_);

	207 const base::string16* best = nullptr;

	208

	209 DCHECK(HaveMergeableCompanyNames(p1, p2))

	210 << "Company names are not mergeable: '" << c1 << "' vs '" << c2 << "'";

	211

	212 CompareTokensResult result =

	213 CompareTokens(NormalizeForComparison(c1), NormalizeForComparison(c2));

	214 switch (result) {

	215 case DIFFERENT_TOKENS:

	216 default:

	217 NOTREACHED();

	218 return false;

	219 case S1_CONTAINS_S2:

	220 best = &c1;

	221 break;

	222 case S2_CONTAINS_S1:

	223 best = &c2;

	224 break;

	225 case SAME_TOKENS:

	226 best = p2.use_date() > p1.use_date() ? &c2 : &c1;

	227 break;

	228 }

	229

	230 company_info->SetInfo(kCompanyName, *best, app_locale_);

	231 return true;

	232 }

	233

	234 bool AutofillProfileComparator::MergePhoneNumbers(

	235 const AutofillProfile& p1,

	236 const AutofillProfile& p2,

	237 PhoneNumber* phone_number) const {

	238 const ServerFieldType kWholePhoneNumber = PHONE_HOME_WHOLE_NUMBER;

	239 const base::string16& s1 = p1.GetRawInfo(kWholePhoneNumber);

	240 const base::string16& s2 = p2.GetRawInfo(kWholePhoneNumber);

	241

	242 DCHECK(HaveMergeablePhoneNumbers(p1, p2))

	243 << "Phone numbers are not mergeable: '" << s1 << "' vs '" << s2 << "'";

	244

	245 if (s1.empty()) {

	246 phone_number->SetRawInfo(kWholePhoneNumber, s2);

	247 return true;

	248 }

	249

	250 if (s2.empty() \|\| s1 == s2) {

	251 phone_number->SetRawInfo(kWholePhoneNumber, s1);

	252 return true;

	253 }

	254

	255 // Figure out a country code hint.

	256 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);

	257 std::string region = UTF16ToUTF8(GetNonEmptyOf(p1, p2, kCountryCode));

	258 if (region.empty())

	259 region = AutofillCountry::CountryCodeForLocale(app_locale_);

	260

	261 // Parse the phone numbers.

	262 PhoneNumberUtil* phone_util = PhoneNumberUtil::GetInstance();

	263

	264 ::i18n::phonenumbers::PhoneNumber n1;

	265 if (phone_util->ParseAndKeepRawInput(UTF16ToUTF8(s1), region, &n1) !=

	266 PhoneNumberUtil::NO_PARSING_ERROR) {

	267 return false;

	268 }

	269

	270 ::i18n::phonenumbers::PhoneNumber n2;

	271 if (phone_util->ParseAndKeepRawInput(UTF16ToUTF8(s2), region, &n2) !=

	272 PhoneNumberUtil::NO_PARSING_ERROR) {

	273 return false;

	274 }

	275

	276 ::i18n::phonenumbers::PhoneNumber merged_number;

	277 DCHECK_EQ(n1.country_code(), n2.country_code());

	278 merged_number.set_country_code(n1.country_code());

	279 merged_number.set_national_number(

	280 std::max(n1.national_number(), n2.national_number()));

	281 if (n1.has_extension() && !n1.extension().empty()) {

	282 merged_number.set_extension(n1.extension());

	283 } else if (n2.has_extension() && !n2.extension().empty()) {

	284 merged_number.set_extension(n2.extension());

	285 }

	286 if (n1.has_italian_leading_zero() \|\| n2.has_italian_leading_zero()) {

	287 merged_number.set_italian_leading_zero(n1.italian_leading_zero() \|\|

	288 n2.italian_leading_zero());

	289 }

	290 if (n1.has_number_of_leading_zeros() \|\| n2.has_number_of_leading_zeros()) {

	291 merged_number.set_number_of_leading_zeros(

	292 std::max(n1.number_of_leading_zeros(), n2.number_of_leading_zeros()));

	293 }

	294

	295 PhoneNumberUtil::PhoneNumberFormat format =

	296 region.empty() ? PhoneNumberUtil::NATIONAL

	297 : PhoneNumberUtil::INTERNATIONAL;

	298

	299 std::string new_number;

	300 phone_util->Format(merged_number, format, &new_number);

	301

	302 VLOG(1) << "n1 = {" << n1 << "}";

	303 VLOG(1) << "n2 = {" << n2 << "}";

	304 VLOG(1) << "merged_number = {" << merged_number << "}";

	305 VLOG(1) << "new_number = \"" << new_number << "\"";

	306

	307 // Check if it's a North American number that's missing the area code.

	308 // Libphonenumber doesn't know how to format short numbers; it will still

	309 // include the country code prefix.

	310 if (merged_number.country_code() == 1 &&

	311 merged_number.national_number() <= 9999999 &&

	312 new_number.find("+1") == 0) {

	313 size_t offset = 2; // The char just after "+1".

	314 while (offset < new_number.size() &&

	315 base::IsAsciiWhitespace(new_number[offset])) {

	316 ++offset;

	317 }

	318 new_number = new_number.substr(offset);

	319 }

	320

	321 phone_number->SetRawInfo(kWholePhoneNumber, UTF8ToUTF16(new_number));

	322

	323 return true;

	324 }

	325

	326 bool AutofillProfileComparator::MergeAddresses(const AutofillProfile& p1,

	327 const AutofillProfile& p2,

	328 Address* address) const {

	329 DCHECK(HaveMergeableAddresses(p1, p2));

	330

	331 // One of the countries is empty or they are the same modulo case, so we just

	332 // have to find the non-empty one, if any.

	333 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);

	334 const base::string16& country_code =

	335 base::i18n::ToUpper(GetNonEmptyOf(p1, p2, kCountryCode));

	336 address->SetInfo(kCountryCode, country_code, app_locale_);

	337

	338 // One of the zip codes is empty, they are the same, or one is a substring

	339 // of the other. We prefer the most recently used zip code.

	340 const AutofillType kZipCode(ADDRESS_HOME_ZIP);

	341 const base::string16& zip1 = p1.GetInfo(kZipCode, app_locale_);

	342 const base::string16& zip2 = p2.GetInfo(kZipCode, app_locale_);

	343 if (zip1.empty()) {

	344 address->SetInfo(kZipCode, zip2, app_locale_);

	345 } else if (zip2.empty()) {

	346 address->SetInfo(kZipCode, zip1, app_locale_);

	347 } else {

	348 address->SetInfo(kZipCode, (p2.use_date() > p1.use_date() ? zip2 : zip1),

	349 app_locale_);

	350 }

	351

	352 // One of the states is empty or one of the states has a subset of tokens from

	353 // the other. Pick the non-empty state that is shorter. This is usually the

	354 // abbreviated one.

	355 const AutofillType kState(ADDRESS_HOME_STATE);

	356 const base::string16& state1 = p1.GetInfo(kState, app_locale_);

	357 const base::string16& state2 = p2.GetInfo(kState, app_locale_);

	358 if (state1.empty()) {

	359 address->SetInfo(kState, state2, app_locale_);

	360 } else if (state2.empty()) {

	361 address->SetInfo(kState, state1, app_locale_);

	362 } else {

	363 address->SetInfo(kState, (state2.size() < state1.size() ? state2 : state1),

	364 app_locale_);

	365 }

	366

	367 // One of the cities is empty or one of the cities has a subset of tokens from

	368 // the other. Pick the city name with more tokens; this is usually the most

	369 // explicit one.

	370 const AutofillType kCity(ADDRESS_HOME_CITY);

	371 const base::string16& city1 = p1.GetInfo(kCity, app_locale_);

	372 const base::string16& city2 = p2.GetInfo(kCity, app_locale_);

	373 if (city1.empty()) {

	374 address->SetInfo(kCity, city2, app_locale_);

	375 } else if (city2.empty()) {

	376 address->SetInfo(kCity, city1, app_locale_);

	377 } else {

	378 // Prefer the one with more tokens.

	379 CompareTokensResult result = CompareTokens(NormalizeForComparison(city1),

	380 NormalizeForComparison(city2));

	381 switch (result) {

	382 case SAME_TOKENS:

	383 // They have the same set of unique tokens. Let's pick the more recently

	384 // used one.

	385 address->SetInfo(kCity, (p2.use_date() > p1.use_date() ? city2 : city1),

	386 app_locale_);

	387 break;

	388 case S1_CONTAINS_S2:

	389 // city1 has more unique tokens than city2.

	390 address->SetInfo(kCity, city1, app_locale_);

	391 break;

	392 case S2_CONTAINS_S1:

	393 // city2 has more unique tokens than city1.

	394 address->SetInfo(kCity, city2, app_locale_);

	395 break;

	396 case DIFFERENT_TOKENS:

	397 default:

	398 // The addresses aren't mergeable and we shouldn't be doing any of

	399 // this.

	400 NOTREACHED();

	401 return false;

	402 }

	403 }

	404

	405 // One of the addresses is empty or one of the addresses has a subset of

	406 // tokens from the other. Prefer the more verbosely expressed one.

	407 const AutofillType kStreetAddress(ADDRESS_HOME_STREET_ADDRESS);

	408 const base::string16& address1 = p1.GetInfo(kStreetAddress, app_locale_);

	409 const base::string16& address2 = p2.GetInfo(kStreetAddress, app_locale_);

	410 // If one of the addresses is empty then use the other.

	411 if (address1.empty()) {

	412 address->SetInfo(kStreetAddress, address2, app_locale_);

	413 } else if (address2.empty()) {

	414 address->SetInfo(kStreetAddress, address1, app_locale_);

	415 } else {

	416 // Prefer the multi-line address if one is multi-line and the other isn't.

	417 bool address1_multiline = ContainsNewline(address1);

	418 bool address2_multiline = ContainsNewline(address2);

	419 if (address1_multiline && !address2_multiline) {

	420 address->SetInfo(kStreetAddress, address1, app_locale_);

	421 } else if (address2_multiline && !address1_multiline) {

	422 address->SetInfo(kStreetAddress, address2, app_locale_);

	423 } else {

	424 // Prefer the one with more tokens if they're both single-line or both

	425 // multi-line addresses.

	426 CompareTokensResult result = CompareTokens(

	427 NormalizeForComparison(address1), NormalizeForComparison(address2));

	428 switch (result) {

	429 case SAME_TOKENS:

	430 // They have the same set of unique tokens. Let's pick the one that's

	431 // longer.

	432 address->SetInfo(

	433 kStreetAddress,

	434 (p2.use_date() > p1.use_date() ? address2 : address1),

	435 app_locale_);

	436 break;

	437 case S1_CONTAINS_S2:

	438 // address1 has more unique tokens than address2.

	439 address->SetInfo(kStreetAddress, address1, app_locale_);

	440 break;

	441 case S2_CONTAINS_S1:

	442 // address2 has more unique tokens than address1.

	443 address->SetInfo(kStreetAddress, address1, app_locale_);

	444 break;

	445 case DIFFERENT_TOKENS:

	446 default:

	447 // The addresses aren't mergeable and we shouldn't be doing any of

	448 // this.

	449 NOTREACHED();

	450 return false;

	451 }

	452 }

	453 }

	454 return true;

	455 }

	456

116 // static	457 // static

117 std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens(	458 std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens(

118 base::StringPiece16 s) {	459 base::StringPiece16 s) {

119 std::vector<base::StringPiece16> tokens = base::SplitStringPiece(	460 std::vector<base::StringPiece16> tokens = base::SplitStringPiece(

120 s, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);	461 s, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

121 return std::set<base::StringPiece16>(tokens.begin(), tokens.end());	462 return std::set<base::StringPiece16>(tokens.begin(), tokens.end());

122 }	463 }

123	464

124 // static	465 // static

125 bool AutofillProfileComparator::HaveSameTokens(base::StringPiece16 s1,	466 AutofillProfileComparator::CompareTokensResult

126 base::StringPiece16 s2) {	467 AutofillProfileComparator::CompareTokens(base::StringPiece16 s1,

	468 base::StringPiece16 s2) {

	469 // Note: std::include() expects the items in each range to be in sorted order,

	470 // hence the use of std::set<> instead of std::unordered_set<>.

127 std::set<base::StringPiece16> t1 = UniqueTokens(s1);	471 std::set<base::StringPiece16> t1 = UniqueTokens(s1);

128 std::set<base::StringPiece16> t2 = UniqueTokens(s2);	472 std::set<base::StringPiece16> t2 = UniqueTokens(s2);

129	473

130 // Note: std::include() expects the items in each range to be in sorted order,	474 // Does s1 contains all of the tokens in s2? As a special case, return 0 if

131 // hence the use of std::set<> instead of std::unordered_set<>.	475 // the two sets are exactly the same.

132 return std::includes(t1.begin(), t1.end(), t2.begin(), t2.end()) \|\|	476 if (std::includes(t1.begin(), t1.end(), t2.begin(), t2.end()))

133 std::includes(t2.begin(), t2.end(), t1.begin(), t1.end());	477 return t1.size() == t2.size() ? SAME_TOKENS : S1_CONTAINS_S2;

	478

	479 // Does s2 contain all of the tokens in s1?

	480 if (std::includes(t2.begin(), t2.end(), t1.begin(), t1.end()))

	481 return S2_CONTAINS_S1;

	482

	483 // Neither string contains all of the tokens from the other.

	484 return DIFFERENT_TOKENS;

	485 }

	486

	487 base::string16 AutofillProfileComparator::GetNonEmptyOf(

	488 const AutofillProfile& p1,

	489 const AutofillProfile& p2,

	490 AutofillType t) const {

	491 const base::string16& s1 = p1.GetInfo(t, app_locale_);

	492 if (!s1.empty())

	493 return s1;

	494 return p2.GetInfo(t, app_locale_);

134 }	495 }

135	496

136 // static	497 // static

137 std::set<base::string16> AutofillProfileComparator::GetNamePartVariants(	498 std::set<base::string16> AutofillProfileComparator::GetNamePartVariants(

138 const base::string16& name_part) {	499 const base::string16& name_part) {

139 const size_t kMaxSupportedSubNames = 8;	500 const size_t kMaxSupportedSubNames = 8;

140	501

141 std::vector<base::string16> sub_names = base::SplitString(	502 std::vector<base::string16> sub_names = base::SplitString(

142 name_part, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);	503 name_part, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

143	504

144 // Limit the number of sub-names we support (to constrain memory usage);	505 // Limit the number of sub-names we support (to constrain memory usage);

145 if (sub_names.size() > kMaxSupportedSubNames)	506 if (sub_names.size() > kMaxSupportedSubNames)

146 return {name_part};	507 return {name_part};

147	508

148 // Start with the empty string as a variant.	509 // Start with the empty string as a variant.

149 std::set<base::string16> variants = {base::EmptyString16()};	510 std::set<base::string16> variants = {base::EmptyString16()};

150	511

151 // For each sub-name, add a variant of all the already existing variants that	512 // For each sub-name, add a variant of all the already existing variants that

152 // appends this sub-name and one that appends the initial of this sub-name.	513 // appends this sub-name and one that appends the initial of this sub-name.

153 // Duplicates will be discarded when they're added to the variants set.	514 // Duplicates will be discarded when they're added to the variants set.

154 for (const base::string16& sub_name : sub_names) {	515 for (const base::string16& sub_name : sub_names) {

155 if (sub_name.empty()) continue;	516 if (sub_name.empty())

	517 continue;

156 std::vector<base::string16> new_variants;	518 std::vector<base::string16> new_variants;

157 for (const base::string16& variant : variants) {	519 for (const base::string16& variant : variants) {

158 new_variants.push_back(base::CollapseWhitespace(	520 new_variants.push_back(base::CollapseWhitespace(

159 base::JoinString({variant, sub_name}, kSpace), true));	521 base::JoinString({variant, sub_name}, kSpace), true));

160 new_variants.push_back(base::CollapseWhitespace(	522 new_variants.push_back(base::CollapseWhitespace(

161 base::JoinString({variant, sub_name.substr(0, 1)}, kSpace), true));	523 base::JoinString({variant, sub_name.substr(0, 1)}, kSpace), true));

162 }	524 }

163 variants.insert(new_variants.begin(), new_variants.end());	525 variants.insert(new_variants.begin(), new_variants.end());

164 }	526 }

165	527

166 // As a common case, also add the variant that just concatenates all of the	528 // As a common case, also add the variant that just concatenates all of the

167 // initials.	529 // initials.

168 base::string16 initials;	530 base::string16 initials;

169 for (const base::string16& sub_name : sub_names) {	531 for (const base::string16& sub_name : sub_names) {

170 if (sub_name.empty()) continue;	532 if (sub_name.empty())

	533 continue;

171 initials.push_back(sub_name[0]);	534 initials.push_back(sub_name[0]);

172 }	535 }

173 variants.insert(initials);	536 variants.insert(initials);

174	537

175 // And, we're done.	538 // And, we're done.

176 return variants;	539 return variants;

177 }	540 }

178	541

179 bool AutofillProfileComparator::IsNameVariantOf(	542 bool AutofillProfileComparator::IsNameVariantOf(

180 const base::string16& full_name_1,	543 const base::string16& full_name_1,

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
246 }	609 }

247	610

248 bool AutofillProfileComparator::HaveMergeableCompanyNames(	611 bool AutofillProfileComparator::HaveMergeableCompanyNames(

249 const AutofillProfile& p1,	612 const AutofillProfile& p1,

250 const AutofillProfile& p2) const {	613 const AutofillProfile& p2) const {

251 const base::string16& company_name_1 = NormalizeForComparison(	614 const base::string16& company_name_1 = NormalizeForComparison(

252 p1.GetInfo(AutofillType(COMPANY_NAME), app_locale_));	615 p1.GetInfo(AutofillType(COMPANY_NAME), app_locale_));

253 const base::string16& company_name_2 = NormalizeForComparison(	616 const base::string16& company_name_2 = NormalizeForComparison(

254 p2.GetInfo(AutofillType(COMPANY_NAME), app_locale_));	617 p2.GetInfo(AutofillType(COMPANY_NAME), app_locale_));

255 return company_name_1.empty() \|\| company_name_2.empty() \|\|	618 return company_name_1.empty() \|\| company_name_2.empty() \|\|

256 HaveSameTokens(company_name_1, company_name_2);	619 CompareTokens(company_name_1, company_name_2) != DIFFERENT_TOKENS;

257 }	620 }

258	621

259 bool AutofillProfileComparator::HaveMergeablePhoneNumbers(	622 bool AutofillProfileComparator::HaveMergeablePhoneNumbers(

260 const AutofillProfile& p1,	623 const AutofillProfile& p1,

261 const AutofillProfile& p2) const {	624 const AutofillProfile& p2) const {

262 // We work with the raw phone numbers to avoid losing any helpful information	625 // We work with the raw phone numbers to avoid losing any helpful information

263 // as we parse.	626 // as we parse.

264 const base::string16& raw_phone_1 = p1.GetRawInfo(PHONE_HOME_WHOLE_NUMBER);	627 const base::string16& raw_phone_1 = p1.GetRawInfo(PHONE_HOME_WHOLE_NUMBER);

265 const base::string16& raw_phone_2 = p2.GetRawInfo(PHONE_HOME_WHOLE_NUMBER);	628 const base::string16& raw_phone_2 = p2.GetRawInfo(PHONE_HOME_WHOLE_NUMBER);

266	629

267 // Are the two phone numbers trivially mergeable?	630 // Are the two phone numbers trivially mergeable?

268 if (raw_phone_1.empty() \|\| raw_phone_2.empty() \|\|	631 if (raw_phone_1.empty() \|\| raw_phone_2.empty() \|\|

269 raw_phone_1 == raw_phone_2) {	632 raw_phone_1 == raw_phone_2) {

270 return true;	633 return true;

271 }	634 }

272	635

273 // TODO(rogerm): Modify ::autofill::i18n::PhoneNumbersMatch to support	636 // TODO(rogerm): Modify ::autofill::i18n::PhoneNumbersMatch to support

274 // SHORT_NSN_MATCH and just call that instead of accessing the underlying	637 // SHORT_NSN_MATCH and just call that instead of accessing the underlying

275 // utility library directly?	638 // utility library directly?

276	639

277 // The phone number util library needs the numbers in utf8.	640 // The phone number util library needs the numbers in utf8.

278 const std::string phone_1 = base::UTF16ToUTF8(raw_phone_1);	641 const std::string phone_1 = base::UTF16ToUTF8(raw_phone_1);

279 const std::string phone_2 = base::UTF16ToUTF8(raw_phone_2);	642 const std::string phone_2 = base::UTF16ToUTF8(raw_phone_2);

280	643

281 // Parse and compare the phone numbers.	644 // Parse and compare the phone numbers.

282 using ::i18n::phonenumbers::PhoneNumberUtil;

283 PhoneNumberUtil* phone_util = PhoneNumberUtil::GetInstance();	645 PhoneNumberUtil* phone_util = PhoneNumberUtil::GetInstance();

284 switch (phone_util->IsNumberMatchWithTwoStrings(phone_1, phone_2)) {	646 switch (phone_util->IsNumberMatchWithTwoStrings(phone_1, phone_2)) {

285 case PhoneNumberUtil::INVALID_NUMBER:	647 case PhoneNumberUtil::INVALID_NUMBER:

286 case PhoneNumberUtil::NO_MATCH:	648 case PhoneNumberUtil::NO_MATCH:

287 return false;	649 return false;

288 case PhoneNumberUtil::SHORT_NSN_MATCH:	650 case PhoneNumberUtil::SHORT_NSN_MATCH:

289 case PhoneNumberUtil::NSN_MATCH:	651 case PhoneNumberUtil::NSN_MATCH:

290 case PhoneNumberUtil::EXACT_MATCH:	652 case PhoneNumberUtil::EXACT_MATCH:

291 return true;	653 return true;

292 }	654 }

293	655

294 NOTREACHED();	656 NOTREACHED();

295 return false;	657 return false;

296 }	658 }

297	659

298 bool AutofillProfileComparator::HaveMergeableAddresses(	660 bool AutofillProfileComparator::HaveMergeableAddresses(

299 const AutofillProfile& p1,	661 const AutofillProfile& p1,

300 const AutofillProfile& p2) const {	662 const AutofillProfile& p2) const {

301 // If the address are not in the same country, then they're not the same. If	663 // If the address are not in the same country, then they're not the same. If

302 // one of the address countries is unknown/invalid the comparison continues.	664 // one of the address countries is unknown/invalid the comparison continues.

303 const base::string16& country1 = p1.GetInfo(	665 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);

304 AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_);	666 const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_);

305 const base::string16& country2 = p2.GetInfo(	667 const base::string16& country2 = p2.GetInfo(kCountryCode, app_locale_);

306 AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_);

307 if (!country1.empty() && !country2.empty() &&	668 if (!country1.empty() && !country2.empty() &&

308 !case_insensitive_compare_.StringsEqual(country1, country2)) {	669 !case_insensitive_compare_.StringsEqual(country1, country2)) {

309 return false;	670 return false;

310 }	671 }

311	672

312 // TODO(rogerm): Lookup the normalization rules for the (common) country of	673 // TODO(rogerm): Lookup the normalization rules for the (common) country of

313 // the address. The rules should be applied post NormalizeForComparison to	674 // the address. The rules should be applied post NormalizeForComparison to

314 // the state, city, and address bag of words comparisons.	675 // the state, city, and address bag of words comparisons.

315	676

316 // Zip	677 // Zip

317 // ----	678 // ----

318 // If the addresses are definitely not in the same zip/area code then we're	679 // If the addresses are definitely not in the same zip/area code then we're

319 // done. Otherwise,the comparison continues.	680 // done. Otherwise,the comparison continues.

	681 const AutofillType kZipCode(ADDRESS_HOME_ZIP);

320 const base::string16& zip1 = NormalizeForComparison(	682 const base::string16& zip1 = NormalizeForComparison(

321 p1.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_),	683 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);

322 DISCARD_WHITESPACE);

323 const base::string16& zip2 = NormalizeForComparison(	684 const base::string16& zip2 = NormalizeForComparison(

324 p2.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_),	685 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);

325 DISCARD_WHITESPACE);

326 if (!zip1.empty() && !zip2.empty() &&	686 if (!zip1.empty() && !zip2.empty() &&

327 zip1.find(zip2) == base::string16::npos &&	687 zip1.find(zip2) == base::string16::npos &&

328 zip2.find(zip1) == base::string16::npos) {	688 zip2.find(zip1) == base::string16::npos) {

329 return false;	689 return false;

330 }	690 }

331	691

332 // State	692 // State

333 // ------	693 // ------

334 // Heuristic: If the match is between non-empty zip codes then we can infer	694 // Heuristic: States are mergeable if one is a (possibly empty) bag of words

	695 // subset of the other.

	696 //

	697 // TODO(rogerm): If the match is between non-empty zip codes then we can infer

335 // that the two state strings are intended to have the same meaning. This	698 // that the two state strings are intended to have the same meaning. This

336 // handles the cases where we have invalid or poorly formed data in one of the	699 // handles the cases where we have invalid or poorly formed data in one of the

337 // state values (like "Select one", or "CA - California"). Otherwise, we	700 // state values (like "Select one", or "CA - California").

338 // actually have to check if the states map to the the same set of tokens.	701 const AutofillType kState(ADDRESS_HOME_STATE);

339 const base::string16& state1 = NormalizeForComparison(	702 const base::string16& state1 =

340 p1.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_));	703 NormalizeForComparison(p1.GetInfo(kState, app_locale_));

341 const base::string16& state2 = NormalizeForComparison(	704 const base::string16& state2 =

342 p2.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_));	705 NormalizeForComparison(p2.GetInfo(kState, app_locale_));

343 if ((zip1.empty() \|\| zip2.empty()) && !HaveSameTokens(state1, state2)) {	706 if (!IsMatchingState(GetNonEmptyOf(p1, p2, kCountryCode), state1, state2) &&

	707 CompareTokens(state1, state2) == DIFFERENT_TOKENS) {

344 return false;	708 return false;

345 }	709 }

346	710

347 // City	711 // City

348 // ------	712 // ------

349 // Heuristic: If the match is between non-empty zip codes then we can infer	713 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words

	714 // subset of the other.

	715 //

	716 // TODO(rogerm): If the match is between non-empty zip codes then we can infer

350 // that the two city strings are intended to have the same meaning. This	717 // that the two city strings are intended to have the same meaning. This

351 // handles the cases where we have a city vs one of its suburbs. Otherwise, we	718 // handles the cases where we have a city vs one of its suburbs.

352 // actually have to check if the cities map to the the same set of tokens.

353 const base::string16& city1 = NormalizeForComparison(	719 const base::string16& city1 = NormalizeForComparison(

354 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));	720 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));

355 const base::string16& city2 = NormalizeForComparison(	721 const base::string16& city2 = NormalizeForComparison(

356 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));	722 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));

357 if ((zip1.empty() \|\| zip2.empty()) && !HaveSameTokens(city1, city2)) {	723 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) {

358 return false;	724 return false;

359 }	725 }

360	726

361 // Address	727 // Address

362 // --------	728 // --------

363 // Heuristic: Use bag of words comparison on the post-normalized addresses.	729 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag

	730 // of words subset of the other.

364 const base::string16& address1 = NormalizeForComparison(	731 const base::string16& address1 = NormalizeForComparison(

365 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));	732 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));

366 const base::string16& address2 = NormalizeForComparison(	733 const base::string16& address2 = NormalizeForComparison(

367 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));	734 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));

368 if (!HaveSameTokens(address1, address2)) {	735 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) {

369 return false;	736 return false;

370 }	737 }

371	738

372 return true;	739 return true;

373 }	740 }

374	741

	742 bool AutofillProfileComparator::IsMatchingState(

	743 const base::string16& country_code,

	744 const base::string16& state1,

	745 const base::string16& state2) const {

	746 if (state1 == state2)

	747 return true;

	748

	749 if (country_code != kUS)

	750 return false;

	751

	752 // TODO(rogerm): Generalize this to all locals using string equivalence rules.

	753 base::string16 name, abbreviation;

	754 autofill::state_names::GetNameAndAbbreviation(state1, &name, &abbreviation);

	755 if (abbreviation.empty()) {

	756 // state1 wasn't recognized. There's no need to compare it to state2

	757 return false;

	758 }

	759

	760 return state2 == name \|\| state2 == abbreviation;

	761 }

	762

375 } // namespace autofill	763 } // namespace autofill

OLD	NEW