Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(947)

Side by Side Diff: components/autofill/core/browser/autofill_profile_comparator.cc

Issue 2088443002: Expand autofill profile merge logic. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Tommy's comments Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/autofill/core/browser/autofill_profile_comparator.h" 5 #include "components/autofill/core/browser/autofill_profile_comparator.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <vector> 8 #include <vector>
9 9
10 #include "base/i18n/case_conversion.h"
10 #include "base/i18n/char_iterator.h" 11 #include "base/i18n/char_iterator.h"
12 #include "base/strings/string_piece.h"
11 #include "base/strings/string_split.h" 13 #include "base/strings/string_split.h"
12 #include "base/strings/string_util.h" 14 #include "base/strings/string_util.h"
13 #include "base/strings/utf_string_conversion_utils.h" 15 #include "base/strings/utf_string_conversion_utils.h"
14 #include "base/strings/utf_string_conversions.h" 16 #include "base/strings/utf_string_conversions.h"
17 #include "components/autofill/core/browser/autofill_country.h"
15 #include "components/autofill/core/browser/autofill_data_util.h" 18 #include "components/autofill/core/browser/autofill_data_util.h"
19 #include "components/autofill/core/browser/state_names.h"
16 #include "third_party/libphonenumber/phonenumber_api.h" 20 #include "third_party/libphonenumber/phonenumber_api.h"
17 21
22 using i18n::phonenumbers::PhoneNumberUtil;
23 using base::UTF16ToUTF8;
24 using base::UTF8ToUTF16;
25
18 namespace autofill { 26 namespace autofill {
19 namespace { 27 namespace {
20 28
21 const base::char16 kSpace[] = {L' ', L'\0'}; 29 const base::char16 kSpace[] = {L' ', L'\0'};
30 const base::char16 kUS[] = {L'U', L'S', L'\0'};
31
32 bool ContainsNewline(base::StringPiece16 text) {
33 return text.find('\n') != base::StringPiece16::npos;
34 }
35
36 std::ostream& operator<<(std::ostream& os,
37 const ::i18n::phonenumbers::PhoneNumber& n) {
38 os << "country_code: " << n.country_code() << " "
39 << "national_number: " << n.national_number();
40 if (n.has_extension())
41 os << " extension: \"" << n.extension() << "\"";
42 if (n.has_italian_leading_zero())
43 os << " italian_leading_zero: " << n.italian_leading_zero();
44 if (n.has_number_of_leading_zeros())
45 os << " number_of_leading_zeros: " << n.number_of_leading_zeros();
46 if (n.has_raw_input())
47 os << " raw_input: \"" << n.raw_input() << "\"";
48 return os;
49 }
22 50
23 } // namespace 51 } // namespace
24 52
25 AutofillProfileComparator::AutofillProfileComparator( 53 AutofillProfileComparator::AutofillProfileComparator(
26 const base::StringPiece& app_locale) 54 const base::StringPiece& app_locale)
27 : app_locale_(app_locale.data(), app_locale.size()) { 55 : app_locale_(app_locale.data(), app_locale.size()) {
28 // Use ICU transliteration to remove diacritics and fold case. 56 // Use ICU transliteration to remove diacritics and fold case.
29 // See http://userguide.icu-project.org/transforms/general 57 // See http://userguide.icu-project.org/transforms/general
30 UErrorCode status = U_ZERO_ERROR; 58 UErrorCode status = U_ZERO_ERROR;
31 std::unique_ptr<icu::Transliterator> transliterator( 59 std::unique_ptr<icu::Transliterator> transliterator(
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
106 bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1, 134 bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1,
107 const AutofillProfile& p2) const { 135 const AutofillProfile& p2) const {
108 // Sorted in order to relative expense of the tests to fail early and cheaply 136 // Sorted in order to relative expense of the tests to fail early and cheaply
109 // if possible. 137 // if possible.
110 return HaveMergeableEmailAddresses(p1, p2) && 138 return HaveMergeableEmailAddresses(p1, p2) &&
111 HaveMergeableCompanyNames(p1, p2) && 139 HaveMergeableCompanyNames(p1, p2) &&
112 HaveMergeablePhoneNumbers(p1, p2) && HaveMergeableNames(p1, p2) && 140 HaveMergeablePhoneNumbers(p1, p2) && HaveMergeableNames(p1, p2) &&
113 HaveMergeableAddresses(p1, p2); 141 HaveMergeableAddresses(p1, p2);
114 } 142 }
115 143
144 bool AutofillProfileComparator::MergeNames(const AutofillProfile& p1,
145 const AutofillProfile& p2,
146 NameInfo* name_info) const {
147 DCHECK(HaveMergeableNames(p1, p2));
148
149 const AutofillType kFullName(NAME_FULL);
150 const base::string16& full_name_1 = p1.GetInfo(kFullName, app_locale_);
151 const base::string16& full_name_2 = p2.GetInfo(kFullName, app_locale_);
152 const base::string16& normalized_full_name_1 =
153 NormalizeForComparison(full_name_1);
154 const base::string16& normalized_full_name_2 =
155 NormalizeForComparison(full_name_2);
156
157 const base::string16* best_name = nullptr;
158 if (normalized_full_name_1.empty()) {
159 // p1 has no name, so use the name from p2.
160 best_name = &full_name_2;
161 } else if (normalized_full_name_2.empty()) {
162 // p2 has no name, so use the name from p1.
163 best_name = &full_name_1;
164 } else if (IsNameVariantOf(normalized_full_name_1, normalized_full_name_2)) {
165 // full_name_2 is a variant of full_name_1.
166 best_name = &full_name_1;
167 } else {
168 // If the assertion that p1 and p2 have mergeable names is true, then
169 // full_name_1 must be a name variant of full_name_2;
170 best_name = &full_name_2;
171 }
172
173 name_info->SetInfo(AutofillType(NAME_FULL), *best_name, app_locale_);
174 return true;
175 }
176
177 bool AutofillProfileComparator::MergeEmailAddresses(
178 const AutofillProfile& p1,
179 const AutofillProfile& p2,
180 EmailInfo* email_info) const {
181 DCHECK(HaveMergeableEmailAddresses(p1, p2));
182
183 const AutofillType kEmailAddress(EMAIL_ADDRESS);
184 const base::string16& e1 = p1.GetInfo(kEmailAddress, app_locale_);
185 const base::string16& e2 = p2.GetInfo(kEmailAddress, app_locale_);
186 const base::string16* best = nullptr;
187
188 if (e1.empty()) {
189 best = &e2;
190 } else if (e2.empty()) {
191 best = &e1;
192 } else {
193 best = p2.use_date() > p1.use_date() ? &e2 : &e1;
194 }
195
196 email_info->SetInfo(kEmailAddress, *best, app_locale_);
197 return true;
198 }
199
200 bool AutofillProfileComparator::MergeCompanyNames(
201 const AutofillProfile& p1,
202 const AutofillProfile& p2,
203 CompanyInfo* company_info) const {
204 const AutofillType kCompanyName(COMPANY_NAME);
205 const base::string16& c1 = p1.GetInfo(kCompanyName, app_locale_);
206 const base::string16& c2 = p2.GetInfo(kCompanyName, app_locale_);
207 const base::string16* best = nullptr;
208
209 DCHECK(HaveMergeableCompanyNames(p1, p2))
210 << "Company names are not mergeable: '" << c1 << "' vs '" << c2 << "'";
211
212 CompareTokensResult result =
213 CompareTokens(NormalizeForComparison(c1), NormalizeForComparison(c2));
214 switch (result) {
215 case DIFFERENT_TOKENS:
216 default:
217 NOTREACHED();
218 return false;
219 case S1_CONTAINS_S2:
220 best = &c1;
221 break;
222 case S2_CONTAINS_S1:
223 best = &c2;
224 break;
225 case SAME_TOKENS:
226 best = p2.use_date() > p1.use_date() ? &c2 : &c1;
227 break;
228 }
229
230 company_info->SetInfo(kCompanyName, *best, app_locale_);
231 return true;
232 }
233
234 bool AutofillProfileComparator::MergePhoneNumbers(
235 const AutofillProfile& p1,
236 const AutofillProfile& p2,
237 PhoneNumber* phone_number) const {
238 const ServerFieldType kWholePhoneNumber = PHONE_HOME_WHOLE_NUMBER;
239 const base::string16& s1 = p1.GetRawInfo(kWholePhoneNumber);
240 const base::string16& s2 = p2.GetRawInfo(kWholePhoneNumber);
241
242 DCHECK(HaveMergeablePhoneNumbers(p1, p2))
243 << "Phone numbers are not mergeable: '" << s1 << "' vs '" << s2 << "'";
244
245 if (s1.empty()) {
246 phone_number->SetRawInfo(kWholePhoneNumber, s2);
247 return true;
248 }
249
250 if (s2.empty() || s1 == s2) {
251 phone_number->SetRawInfo(kWholePhoneNumber, s1);
252 return true;
253 }
254
255 // Figure out a country code hint.
256 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);
257 std::string region = UTF16ToUTF8(GetNonEmptyOf(p1, p2, kCountryCode));
258 if (region.empty())
259 region = AutofillCountry::CountryCodeForLocale(app_locale_);
260
261 // Parse the phone numbers.
262 PhoneNumberUtil* phone_util = PhoneNumberUtil::GetInstance();
263
264 ::i18n::phonenumbers::PhoneNumber n1;
265 if (phone_util->ParseAndKeepRawInput(UTF16ToUTF8(s1), region, &n1) !=
266 PhoneNumberUtil::NO_PARSING_ERROR) {
267 return false;
268 }
269
270 ::i18n::phonenumbers::PhoneNumber n2;
271 if (phone_util->ParseAndKeepRawInput(UTF16ToUTF8(s2), region, &n2) !=
272 PhoneNumberUtil::NO_PARSING_ERROR) {
273 return false;
274 }
275
276 ::i18n::phonenumbers::PhoneNumber merged_number;
277 DCHECK_EQ(n1.country_code(), n2.country_code());
278 merged_number.set_country_code(n1.country_code());
279 merged_number.set_national_number(
280 std::max(n1.national_number(), n2.national_number()));
281 if (n1.has_extension() && !n1.extension().empty()) {
282 merged_number.set_extension(n1.extension());
283 } else if (n2.has_extension() && !n2.extension().empty()) {
284 merged_number.set_extension(n2.extension());
285 }
286 if (n1.has_italian_leading_zero() || n2.has_italian_leading_zero()) {
287 merged_number.set_italian_leading_zero(n1.italian_leading_zero() ||
288 n2.italian_leading_zero());
289 }
290 if (n1.has_number_of_leading_zeros() || n2.has_number_of_leading_zeros()) {
291 merged_number.set_number_of_leading_zeros(
292 std::max(n1.number_of_leading_zeros(), n2.number_of_leading_zeros()));
293 }
294
295 PhoneNumberUtil::PhoneNumberFormat format =
296 region.empty() ? PhoneNumberUtil::NATIONAL
297 : PhoneNumberUtil::INTERNATIONAL;
298
299 std::string new_number;
300 phone_util->Format(merged_number, format, &new_number);
301
302 VLOG(1) << "n1 = {" << n1 << "}";
303 VLOG(1) << "n2 = {" << n2 << "}";
304 VLOG(1) << "merged_number = {" << merged_number << "}";
305 VLOG(1) << "new_number = \"" << new_number << "\"";
306
307 // Check if it's a North American number that's missing the area code.
308 // Libphonenumber doesn't know how to format short numbers; it will still
309 // include the country code prefix.
310 if (merged_number.country_code() == 1 &&
311 merged_number.national_number() <= 9999999 &&
312 new_number.find("+1") == 0) {
313 size_t offset = 2; // The char just after "+1".
314 while (offset < new_number.size() &&
315 base::IsAsciiWhitespace(new_number[offset])) {
316 ++offset;
317 }
318 new_number = new_number.substr(offset);
319 }
320
321 phone_number->SetRawInfo(kWholePhoneNumber, UTF8ToUTF16(new_number));
322
323 return true;
324 }
325
326 bool AutofillProfileComparator::MergeAddresses(const AutofillProfile& p1,
327 const AutofillProfile& p2,
328 Address* address) const {
329 DCHECK(HaveMergeableAddresses(p1, p2));
330
331 // One of the countries is empty or they are the same modulo case, so we just
332 // have to find the non-empty one, if any.
333 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);
334 const base::string16& country_code =
335 base::i18n::ToUpper(GetNonEmptyOf(p1, p2, kCountryCode));
336 address->SetInfo(kCountryCode, country_code, app_locale_);
337
338 // One of the zip codes is empty, they are the same, or one is a substring
339 // of the other. We prefer the most recently used zip code.
340 const AutofillType kZipCode(ADDRESS_HOME_ZIP);
341 const base::string16& zip1 = p1.GetInfo(kZipCode, app_locale_);
342 const base::string16& zip2 = p2.GetInfo(kZipCode, app_locale_);
343 if (zip1.empty()) {
344 address->SetInfo(kZipCode, zip2, app_locale_);
345 } else if (zip2.empty()) {
346 address->SetInfo(kZipCode, zip1, app_locale_);
347 } else {
348 address->SetInfo(kZipCode, (p2.use_date() > p1.use_date() ? zip2 : zip1),
349 app_locale_);
350 }
351
352 // One of the states is empty or one of the states has a subset of tokens from
353 // the other. Pick the non-empty state that is shorter. This is usually the
354 // abbreviated one.
355 const AutofillType kState(ADDRESS_HOME_STATE);
356 const base::string16& state1 = p1.GetInfo(kState, app_locale_);
357 const base::string16& state2 = p2.GetInfo(kState, app_locale_);
358 if (state1.empty()) {
359 address->SetInfo(kState, state2, app_locale_);
360 } else if (state2.empty()) {
361 address->SetInfo(kState, state1, app_locale_);
362 } else {
363 address->SetInfo(kState, (state2.size() < state1.size() ? state2 : state1),
364 app_locale_);
365 }
366
367 // One of the cities is empty or one of the cities has a subset of tokens from
368 // the other. Pick the city name with more tokens; this is usually the most
369 // explicit one.
370 const AutofillType kCity(ADDRESS_HOME_CITY);
371 const base::string16& city1 = p1.GetInfo(kCity, app_locale_);
372 const base::string16& city2 = p2.GetInfo(kCity, app_locale_);
373 if (city1.empty()) {
374 address->SetInfo(kCity, city2, app_locale_);
375 } else if (city2.empty()) {
376 address->SetInfo(kCity, city1, app_locale_);
377 } else {
378 // Prefer the one with more tokens.
379 CompareTokensResult result = CompareTokens(NormalizeForComparison(city1),
380 NormalizeForComparison(city2));
381 switch (result) {
382 case SAME_TOKENS:
383 // They have the same set of unique tokens. Let's pick the more recently
384 // used one.
385 address->SetInfo(kCity, (p2.use_date() > p1.use_date() ? city2 : city1),
386 app_locale_);
387 break;
388 case S1_CONTAINS_S2:
389 // city1 has more unique tokens than city2.
390 address->SetInfo(kCity, city1, app_locale_);
391 break;
392 case S2_CONTAINS_S1:
393 // city2 has more unique tokens than city1.
394 address->SetInfo(kCity, city2, app_locale_);
395 break;
396 case DIFFERENT_TOKENS:
397 default:
398 // The addresses aren't mergeable and we shouldn't be doing any of
399 // this.
400 NOTREACHED();
401 return false;
402 }
403 }
404
405 // One of the addresses is empty or one of the addresses has a subset of
406 // tokens from the other. Prefer the more verbosely expressed one.
407 const AutofillType kStreetAddress(ADDRESS_HOME_STREET_ADDRESS);
408 const base::string16& address1 = p1.GetInfo(kStreetAddress, app_locale_);
409 const base::string16& address2 = p2.GetInfo(kStreetAddress, app_locale_);
410 // If one of the addresses is empty then use the other.
411 if (address1.empty()) {
412 address->SetInfo(kStreetAddress, address2, app_locale_);
413 } else if (address2.empty()) {
414 address->SetInfo(kStreetAddress, address1, app_locale_);
415 } else {
416 // Prefer the multi-line address if one is multi-line and the other isn't.
417 bool address1_multiline = ContainsNewline(address1);
418 bool address2_multiline = ContainsNewline(address2);
419 if (address1_multiline && !address2_multiline) {
420 address->SetInfo(kStreetAddress, address1, app_locale_);
421 } else if (address2_multiline && !address1_multiline) {
422 address->SetInfo(kStreetAddress, address2, app_locale_);
423 } else {
424 // Prefer the one with more tokens if they're both single-line or both
425 // multi-line addresses.
426 CompareTokensResult result = CompareTokens(
427 NormalizeForComparison(address1), NormalizeForComparison(address2));
428 switch (result) {
429 case SAME_TOKENS:
430 // They have the same set of unique tokens. Let's pick the one that's
431 // longer.
432 address->SetInfo(
433 kStreetAddress,
434 (p2.use_date() > p1.use_date() ? address2 : address1),
435 app_locale_);
436 break;
437 case S1_CONTAINS_S2:
438 // address1 has more unique tokens than address2.
439 address->SetInfo(kStreetAddress, address1, app_locale_);
440 break;
441 case S2_CONTAINS_S1:
442 // address2 has more unique tokens than address1.
443 address->SetInfo(kStreetAddress, address1, app_locale_);
444 break;
445 case DIFFERENT_TOKENS:
446 default:
447 // The addresses aren't mergeable and we shouldn't be doing any of
448 // this.
449 NOTREACHED();
450 return false;
451 }
452 }
453 }
454 return true;
455 }
456
116 // static 457 // static
117 std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens( 458 std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens(
118 base::StringPiece16 s) { 459 base::StringPiece16 s) {
119 std::vector<base::StringPiece16> tokens = base::SplitStringPiece( 460 std::vector<base::StringPiece16> tokens = base::SplitStringPiece(
120 s, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); 461 s, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
121 return std::set<base::StringPiece16>(tokens.begin(), tokens.end()); 462 return std::set<base::StringPiece16>(tokens.begin(), tokens.end());
122 } 463 }
123 464
124 // static 465 // static
125 bool AutofillProfileComparator::HaveSameTokens(base::StringPiece16 s1, 466 AutofillProfileComparator::CompareTokensResult
126 base::StringPiece16 s2) { 467 AutofillProfileComparator::CompareTokens(base::StringPiece16 s1,
468 base::StringPiece16 s2) {
469 // Note: std::include() expects the items in each range to be in sorted order,
470 // hence the use of std::set<> instead of std::unordered_set<>.
127 std::set<base::StringPiece16> t1 = UniqueTokens(s1); 471 std::set<base::StringPiece16> t1 = UniqueTokens(s1);
128 std::set<base::StringPiece16> t2 = UniqueTokens(s2); 472 std::set<base::StringPiece16> t2 = UniqueTokens(s2);
129 473
130 // Note: std::include() expects the items in each range to be in sorted order, 474 // Does s1 contains all of the tokens in s2? As a special case, return 0 if
131 // hence the use of std::set<> instead of std::unordered_set<>. 475 // the two sets are exactly the same.
132 return std::includes(t1.begin(), t1.end(), t2.begin(), t2.end()) || 476 if (std::includes(t1.begin(), t1.end(), t2.begin(), t2.end()))
133 std::includes(t2.begin(), t2.end(), t1.begin(), t1.end()); 477 return t1.size() == t2.size() ? SAME_TOKENS : S1_CONTAINS_S2;
478
479 // Does s2 contain all of the tokens in s1?
480 if (std::includes(t2.begin(), t2.end(), t1.begin(), t1.end()))
481 return S2_CONTAINS_S1;
482
483 // Neither string contains all of the tokens from the other.
484 return DIFFERENT_TOKENS;
485 }
486
487 base::string16 AutofillProfileComparator::GetNonEmptyOf(
488 const AutofillProfile& p1,
489 const AutofillProfile& p2,
490 AutofillType t) const {
491 const base::string16& s1 = p1.GetInfo(t, app_locale_);
492 if (!s1.empty())
493 return s1;
494 return p2.GetInfo(t, app_locale_);
134 } 495 }
135 496
136 // static 497 // static
137 std::set<base::string16> AutofillProfileComparator::GetNamePartVariants( 498 std::set<base::string16> AutofillProfileComparator::GetNamePartVariants(
138 const base::string16& name_part) { 499 const base::string16& name_part) {
139 const size_t kMaxSupportedSubNames = 8; 500 const size_t kMaxSupportedSubNames = 8;
140 501
141 std::vector<base::string16> sub_names = base::SplitString( 502 std::vector<base::string16> sub_names = base::SplitString(
142 name_part, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); 503 name_part, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
143 504
144 // Limit the number of sub-names we support (to constrain memory usage); 505 // Limit the number of sub-names we support (to constrain memory usage);
145 if (sub_names.size() > kMaxSupportedSubNames) 506 if (sub_names.size() > kMaxSupportedSubNames)
146 return {name_part}; 507 return {name_part};
147 508
148 // Start with the empty string as a variant. 509 // Start with the empty string as a variant.
149 std::set<base::string16> variants = {base::EmptyString16()}; 510 std::set<base::string16> variants = {base::EmptyString16()};
150 511
151 // For each sub-name, add a variant of all the already existing variants that 512 // For each sub-name, add a variant of all the already existing variants that
152 // appends this sub-name and one that appends the initial of this sub-name. 513 // appends this sub-name and one that appends the initial of this sub-name.
153 // Duplicates will be discarded when they're added to the variants set. 514 // Duplicates will be discarded when they're added to the variants set.
154 for (const base::string16& sub_name : sub_names) { 515 for (const base::string16& sub_name : sub_names) {
155 if (sub_name.empty()) continue; 516 if (sub_name.empty())
517 continue;
156 std::vector<base::string16> new_variants; 518 std::vector<base::string16> new_variants;
157 for (const base::string16& variant : variants) { 519 for (const base::string16& variant : variants) {
158 new_variants.push_back(base::CollapseWhitespace( 520 new_variants.push_back(base::CollapseWhitespace(
159 base::JoinString({variant, sub_name}, kSpace), true)); 521 base::JoinString({variant, sub_name}, kSpace), true));
160 new_variants.push_back(base::CollapseWhitespace( 522 new_variants.push_back(base::CollapseWhitespace(
161 base::JoinString({variant, sub_name.substr(0, 1)}, kSpace), true)); 523 base::JoinString({variant, sub_name.substr(0, 1)}, kSpace), true));
162 } 524 }
163 variants.insert(new_variants.begin(), new_variants.end()); 525 variants.insert(new_variants.begin(), new_variants.end());
164 } 526 }
165 527
166 // As a common case, also add the variant that just concatenates all of the 528 // As a common case, also add the variant that just concatenates all of the
167 // initials. 529 // initials.
168 base::string16 initials; 530 base::string16 initials;
169 for (const base::string16& sub_name : sub_names) { 531 for (const base::string16& sub_name : sub_names) {
170 if (sub_name.empty()) continue; 532 if (sub_name.empty())
533 continue;
171 initials.push_back(sub_name[0]); 534 initials.push_back(sub_name[0]);
172 } 535 }
173 variants.insert(initials); 536 variants.insert(initials);
174 537
175 // And, we're done. 538 // And, we're done.
176 return variants; 539 return variants;
177 } 540 }
178 541
179 bool AutofillProfileComparator::IsNameVariantOf( 542 bool AutofillProfileComparator::IsNameVariantOf(
180 const base::string16& full_name_1, 543 const base::string16& full_name_1,
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
246 } 609 }
247 610
248 bool AutofillProfileComparator::HaveMergeableCompanyNames( 611 bool AutofillProfileComparator::HaveMergeableCompanyNames(
249 const AutofillProfile& p1, 612 const AutofillProfile& p1,
250 const AutofillProfile& p2) const { 613 const AutofillProfile& p2) const {
251 const base::string16& company_name_1 = NormalizeForComparison( 614 const base::string16& company_name_1 = NormalizeForComparison(
252 p1.GetInfo(AutofillType(COMPANY_NAME), app_locale_)); 615 p1.GetInfo(AutofillType(COMPANY_NAME), app_locale_));
253 const base::string16& company_name_2 = NormalizeForComparison( 616 const base::string16& company_name_2 = NormalizeForComparison(
254 p2.GetInfo(AutofillType(COMPANY_NAME), app_locale_)); 617 p2.GetInfo(AutofillType(COMPANY_NAME), app_locale_));
255 return company_name_1.empty() || company_name_2.empty() || 618 return company_name_1.empty() || company_name_2.empty() ||
256 HaveSameTokens(company_name_1, company_name_2); 619 CompareTokens(company_name_1, company_name_2) != DIFFERENT_TOKENS;
257 } 620 }
258 621
259 bool AutofillProfileComparator::HaveMergeablePhoneNumbers( 622 bool AutofillProfileComparator::HaveMergeablePhoneNumbers(
260 const AutofillProfile& p1, 623 const AutofillProfile& p1,
261 const AutofillProfile& p2) const { 624 const AutofillProfile& p2) const {
262 // We work with the raw phone numbers to avoid losing any helpful information 625 // We work with the raw phone numbers to avoid losing any helpful information
263 // as we parse. 626 // as we parse.
264 const base::string16& raw_phone_1 = p1.GetRawInfo(PHONE_HOME_WHOLE_NUMBER); 627 const base::string16& raw_phone_1 = p1.GetRawInfo(PHONE_HOME_WHOLE_NUMBER);
265 const base::string16& raw_phone_2 = p2.GetRawInfo(PHONE_HOME_WHOLE_NUMBER); 628 const base::string16& raw_phone_2 = p2.GetRawInfo(PHONE_HOME_WHOLE_NUMBER);
266 629
267 // Are the two phone numbers trivially mergeable? 630 // Are the two phone numbers trivially mergeable?
268 if (raw_phone_1.empty() || raw_phone_2.empty() || 631 if (raw_phone_1.empty() || raw_phone_2.empty() ||
269 raw_phone_1 == raw_phone_2) { 632 raw_phone_1 == raw_phone_2) {
270 return true; 633 return true;
271 } 634 }
272 635
273 // TODO(rogerm): Modify ::autofill::i18n::PhoneNumbersMatch to support 636 // TODO(rogerm): Modify ::autofill::i18n::PhoneNumbersMatch to support
274 // SHORT_NSN_MATCH and just call that instead of accessing the underlying 637 // SHORT_NSN_MATCH and just call that instead of accessing the underlying
275 // utility library directly? 638 // utility library directly?
276 639
277 // The phone number util library needs the numbers in utf8. 640 // The phone number util library needs the numbers in utf8.
278 const std::string phone_1 = base::UTF16ToUTF8(raw_phone_1); 641 const std::string phone_1 = base::UTF16ToUTF8(raw_phone_1);
279 const std::string phone_2 = base::UTF16ToUTF8(raw_phone_2); 642 const std::string phone_2 = base::UTF16ToUTF8(raw_phone_2);
280 643
281 // Parse and compare the phone numbers. 644 // Parse and compare the phone numbers.
282 using ::i18n::phonenumbers::PhoneNumberUtil;
283 PhoneNumberUtil* phone_util = PhoneNumberUtil::GetInstance(); 645 PhoneNumberUtil* phone_util = PhoneNumberUtil::GetInstance();
284 switch (phone_util->IsNumberMatchWithTwoStrings(phone_1, phone_2)) { 646 switch (phone_util->IsNumberMatchWithTwoStrings(phone_1, phone_2)) {
285 case PhoneNumberUtil::INVALID_NUMBER: 647 case PhoneNumberUtil::INVALID_NUMBER:
286 case PhoneNumberUtil::NO_MATCH: 648 case PhoneNumberUtil::NO_MATCH:
287 return false; 649 return false;
288 case PhoneNumberUtil::SHORT_NSN_MATCH: 650 case PhoneNumberUtil::SHORT_NSN_MATCH:
289 case PhoneNumberUtil::NSN_MATCH: 651 case PhoneNumberUtil::NSN_MATCH:
290 case PhoneNumberUtil::EXACT_MATCH: 652 case PhoneNumberUtil::EXACT_MATCH:
291 return true; 653 return true;
292 } 654 }
293 655
294 NOTREACHED(); 656 NOTREACHED();
295 return false; 657 return false;
296 } 658 }
297 659
298 bool AutofillProfileComparator::HaveMergeableAddresses( 660 bool AutofillProfileComparator::HaveMergeableAddresses(
299 const AutofillProfile& p1, 661 const AutofillProfile& p1,
300 const AutofillProfile& p2) const { 662 const AutofillProfile& p2) const {
301 // If the address are not in the same country, then they're not the same. If 663 // If the address are not in the same country, then they're not the same. If
302 // one of the address countries is unknown/invalid the comparison continues. 664 // one of the address countries is unknown/invalid the comparison continues.
303 const base::string16& country1 = p1.GetInfo( 665 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);
304 AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_); 666 const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_);
305 const base::string16& country2 = p2.GetInfo( 667 const base::string16& country2 = p2.GetInfo(kCountryCode, app_locale_);
306 AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_);
307 if (!country1.empty() && !country2.empty() && 668 if (!country1.empty() && !country2.empty() &&
308 !case_insensitive_compare_.StringsEqual(country1, country2)) { 669 !case_insensitive_compare_.StringsEqual(country1, country2)) {
309 return false; 670 return false;
310 } 671 }
311 672
312 // TODO(rogerm): Lookup the normalization rules for the (common) country of 673 // TODO(rogerm): Lookup the normalization rules for the (common) country of
313 // the address. The rules should be applied post NormalizeForComparison to 674 // the address. The rules should be applied post NormalizeForComparison to
314 // the state, city, and address bag of words comparisons. 675 // the state, city, and address bag of words comparisons.
315 676
316 // Zip 677 // Zip
317 // ---- 678 // ----
318 // If the addresses are definitely not in the same zip/area code then we're 679 // If the addresses are definitely not in the same zip/area code then we're
319 // done. Otherwise,the comparison continues. 680 // done. Otherwise,the comparison continues.
681 const AutofillType kZipCode(ADDRESS_HOME_ZIP);
320 const base::string16& zip1 = NormalizeForComparison( 682 const base::string16& zip1 = NormalizeForComparison(
321 p1.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_), 683 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);
322 DISCARD_WHITESPACE);
323 const base::string16& zip2 = NormalizeForComparison( 684 const base::string16& zip2 = NormalizeForComparison(
324 p2.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_), 685 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);
325 DISCARD_WHITESPACE);
326 if (!zip1.empty() && !zip2.empty() && 686 if (!zip1.empty() && !zip2.empty() &&
327 zip1.find(zip2) == base::string16::npos && 687 zip1.find(zip2) == base::string16::npos &&
328 zip2.find(zip1) == base::string16::npos) { 688 zip2.find(zip1) == base::string16::npos) {
329 return false; 689 return false;
330 } 690 }
331 691
332 // State 692 // State
333 // ------ 693 // ------
334 // Heuristic: If the match is between non-empty zip codes then we can infer 694 // Heuristic: States are mergeable if one is a (possibly empty) bag of words
695 // subset of the other.
696 //
697 // TODO(rogerm): If the match is between non-empty zip codes then we can infer
335 // that the two state strings are intended to have the same meaning. This 698 // that the two state strings are intended to have the same meaning. This
336 // handles the cases where we have invalid or poorly formed data in one of the 699 // handles the cases where we have invalid or poorly formed data in one of the
337 // state values (like "Select one", or "CA - California"). Otherwise, we 700 // state values (like "Select one", or "CA - California").
338 // actually have to check if the states map to the the same set of tokens. 701 const AutofillType kState(ADDRESS_HOME_STATE);
339 const base::string16& state1 = NormalizeForComparison( 702 const base::string16& state1 =
340 p1.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_)); 703 NormalizeForComparison(p1.GetInfo(kState, app_locale_));
341 const base::string16& state2 = NormalizeForComparison( 704 const base::string16& state2 =
342 p2.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_)); 705 NormalizeForComparison(p2.GetInfo(kState, app_locale_));
343 if ((zip1.empty() || zip2.empty()) && !HaveSameTokens(state1, state2)) { 706 if (!IsMatchingState(GetNonEmptyOf(p1, p2, kCountryCode), state1, state2) &&
707 CompareTokens(state1, state2) == DIFFERENT_TOKENS) {
344 return false; 708 return false;
345 } 709 }
346 710
347 // City 711 // City
348 // ------ 712 // ------
349 // Heuristic: If the match is between non-empty zip codes then we can infer 713 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words
714 // subset of the other.
715 //
716 // TODO(rogerm): If the match is between non-empty zip codes then we can infer
350 // that the two city strings are intended to have the same meaning. This 717 // that the two city strings are intended to have the same meaning. This
351 // handles the cases where we have a city vs one of its suburbs. Otherwise, we 718 // handles the cases where we have a city vs one of its suburbs.
352 // actually have to check if the cities map to the the same set of tokens.
353 const base::string16& city1 = NormalizeForComparison( 719 const base::string16& city1 = NormalizeForComparison(
354 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); 720 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));
355 const base::string16& city2 = NormalizeForComparison( 721 const base::string16& city2 = NormalizeForComparison(
356 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); 722 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));
357 if ((zip1.empty() || zip2.empty()) && !HaveSameTokens(city1, city2)) { 723 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) {
358 return false; 724 return false;
359 } 725 }
360 726
361 // Address 727 // Address
362 // -------- 728 // --------
363 // Heuristic: Use bag of words comparison on the post-normalized addresses. 729 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag
730 // of words subset of the other.
364 const base::string16& address1 = NormalizeForComparison( 731 const base::string16& address1 = NormalizeForComparison(
365 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); 732 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));
366 const base::string16& address2 = NormalizeForComparison( 733 const base::string16& address2 = NormalizeForComparison(
367 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); 734 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));
368 if (!HaveSameTokens(address1, address2)) { 735 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) {
369 return false; 736 return false;
370 } 737 }
371 738
372 return true; 739 return true;
373 } 740 }
374 741
742 bool AutofillProfileComparator::IsMatchingState(
743 const base::string16& country_code,
744 const base::string16& state1,
745 const base::string16& state2) const {
746 if (state1 == state2)
747 return true;
748
749 if (country_code != kUS)
750 return false;
751
752 // TODO(rogerm): Generalize this to all locals using string equivalence rules.
753 base::string16 name, abbreviation;
754 autofill::state_names::GetNameAndAbbreviation(state1, &name, &abbreviation);
755 if (abbreviation.empty()) {
756 // state1 wasn't recognized. There's no need to compare it to state2
757 return false;
758 }
759
760 return state2 == name || state2 == abbreviation;
761 }
762
375 } // namespace autofill 763 } // namespace autofill
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698