OLD | NEW |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/autofill/core/browser/autofill_profile_comparator.h" | 5 #include "components/autofill/core/browser/autofill_profile_comparator.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <vector> | 8 #include <vector> |
9 | 9 |
10 #include "base/i18n/case_conversion.h" | 10 #include "base/i18n/case_conversion.h" |
11 #include "base/i18n/char_iterator.h" | 11 #include "base/i18n/char_iterator.h" |
12 #include "base/strings/string_piece.h" | 12 #include "base/strings/string_piece.h" |
13 #include "base/strings/string_split.h" | 13 #include "base/strings/string_split.h" |
14 #include "base/strings/string_util.h" | 14 #include "base/strings/string_util.h" |
15 #include "base/strings/utf_string_conversion_utils.h" | 15 #include "base/strings/utf_string_conversion_utils.h" |
16 #include "base/strings/utf_string_conversions.h" | 16 #include "base/strings/utf_string_conversions.h" |
| 17 #include "components/autofill/core/browser/address_rewriter.h" |
17 #include "components/autofill/core/browser/autofill_country.h" | 18 #include "components/autofill/core/browser/autofill_country.h" |
18 #include "components/autofill/core/browser/autofill_data_util.h" | 19 #include "components/autofill/core/browser/autofill_data_util.h" |
19 #include "components/autofill/core/browser/state_names.h" | 20 #include "components/autofill/core/browser/state_names.h" |
20 #include "third_party/libphonenumber/phonenumber_api.h" | 21 #include "third_party/libphonenumber/phonenumber_api.h" |
21 | 22 |
22 using i18n::phonenumbers::PhoneNumberUtil; | 23 using i18n::phonenumbers::PhoneNumberUtil; |
23 using base::UTF16ToUTF8; | 24 using base::UTF16ToUTF8; |
24 using base::UTF8ToUTF16; | 25 using base::UTF8ToUTF16; |
25 | 26 |
26 namespace autofill { | 27 namespace autofill { |
27 namespace { | 28 namespace { |
28 | 29 |
29 const base::char16 kSpace[] = {L' ', L'\0'}; | 30 const base::char16 kSpace[] = {L' ', L'\0'}; |
30 const base::char16 kUS[] = {L'U', L'S', L'\0'}; | |
31 | 31 |
32 bool ContainsNewline(base::StringPiece16 text) { | 32 bool ContainsNewline(base::StringPiece16 text) { |
33 return text.find('\n') != base::StringPiece16::npos; | 33 return text.find('\n') != base::StringPiece16::npos; |
34 } | 34 } |
35 | 35 |
36 std::ostream& operator<<(std::ostream& os, | 36 std::ostream& operator<<(std::ostream& os, |
37 const ::i18n::phonenumbers::PhoneNumber& n) { | 37 const ::i18n::phonenumbers::PhoneNumber& n) { |
38 os << "country_code: " << n.country_code() << " " | 38 os << "country_code: " << n.country_code() << " " |
39 << "national_number: " << n.national_number(); | 39 << "national_number: " << n.national_number(); |
40 if (n.has_extension()) | 40 if (n.has_extension()) |
(...skipping 316 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
357 const base::string16& state2 = p2.GetInfo(kState, app_locale_); | 357 const base::string16& state2 = p2.GetInfo(kState, app_locale_); |
358 if (state1.empty()) { | 358 if (state1.empty()) { |
359 address->SetInfo(kState, state2, app_locale_); | 359 address->SetInfo(kState, state2, app_locale_); |
360 } else if (state2.empty()) { | 360 } else if (state2.empty()) { |
361 address->SetInfo(kState, state1, app_locale_); | 361 address->SetInfo(kState, state1, app_locale_); |
362 } else { | 362 } else { |
363 address->SetInfo(kState, (state2.size() < state1.size() ? state2 : state1), | 363 address->SetInfo(kState, (state2.size() < state1.size() ? state2 : state1), |
364 app_locale_); | 364 app_locale_); |
365 } | 365 } |
366 | 366 |
| 367 AddressRewriter rewriter = AddressRewriter::ForCountryCode(country_code); |
| 368 |
367 // One of the cities is empty or one of the cities has a subset of tokens from | 369 // One of the cities is empty or one of the cities has a subset of tokens from |
368 // the other. Pick the city name with more tokens; this is usually the most | 370 // the other. Pick the city name with more tokens; this is usually the most |
369 // explicit one. | 371 // explicit one. |
370 const AutofillType kCity(ADDRESS_HOME_CITY); | 372 const AutofillType kCity(ADDRESS_HOME_CITY); |
371 const base::string16& city1 = p1.GetInfo(kCity, app_locale_); | 373 const base::string16& city1 = p1.GetInfo(kCity, app_locale_); |
372 const base::string16& city2 = p2.GetInfo(kCity, app_locale_); | 374 const base::string16& city2 = p2.GetInfo(kCity, app_locale_); |
373 if (city1.empty()) { | 375 if (city1.empty()) { |
374 address->SetInfo(kCity, city2, app_locale_); | 376 address->SetInfo(kCity, city2, app_locale_); |
375 } else if (city2.empty()) { | 377 } else if (city2.empty()) { |
376 address->SetInfo(kCity, city1, app_locale_); | 378 address->SetInfo(kCity, city1, app_locale_); |
377 } else { | 379 } else { |
378 // Prefer the one with more tokens. | 380 // Prefer the one with more tokens, making sure to apply address |
379 CompareTokensResult result = CompareTokens(NormalizeForComparison(city1), | 381 // normalization and rewriting before doing the comparison. |
380 NormalizeForComparison(city2)); | 382 CompareTokensResult result = |
| 383 CompareTokens(rewriter.Rewrite(NormalizeForComparison(city1)), |
| 384 rewriter.Rewrite(NormalizeForComparison(city2))); |
381 switch (result) { | 385 switch (result) { |
382 case SAME_TOKENS: | 386 case SAME_TOKENS: |
383 // They have the same set of unique tokens. Let's pick the more recently | 387 // They have the same set of unique tokens. Let's pick the more recently |
384 // used one. | 388 // used one. |
385 address->SetInfo(kCity, (p2.use_date() > p1.use_date() ? city2 : city1), | 389 address->SetInfo(kCity, (p2.use_date() > p1.use_date() ? city2 : city1), |
386 app_locale_); | 390 app_locale_); |
387 break; | 391 break; |
388 case S1_CONTAINS_S2: | 392 case S1_CONTAINS_S2: |
389 // city1 has more unique tokens than city2. | 393 // city1 has more unique tokens than city2. |
390 address->SetInfo(kCity, city1, app_locale_); | 394 address->SetInfo(kCity, city1, app_locale_); |
(...skipping 24 matching lines...) Expand all Loading... |
415 } else { | 419 } else { |
416 // Prefer the multi-line address if one is multi-line and the other isn't. | 420 // Prefer the multi-line address if one is multi-line and the other isn't. |
417 bool address1_multiline = ContainsNewline(address1); | 421 bool address1_multiline = ContainsNewline(address1); |
418 bool address2_multiline = ContainsNewline(address2); | 422 bool address2_multiline = ContainsNewline(address2); |
419 if (address1_multiline && !address2_multiline) { | 423 if (address1_multiline && !address2_multiline) { |
420 address->SetInfo(kStreetAddress, address1, app_locale_); | 424 address->SetInfo(kStreetAddress, address1, app_locale_); |
421 } else if (address2_multiline && !address1_multiline) { | 425 } else if (address2_multiline && !address1_multiline) { |
422 address->SetInfo(kStreetAddress, address2, app_locale_); | 426 address->SetInfo(kStreetAddress, address2, app_locale_); |
423 } else { | 427 } else { |
424 // Prefer the one with more tokens if they're both single-line or both | 428 // Prefer the one with more tokens if they're both single-line or both |
425 // multi-line addresses. | 429 // multi-line addresses, making sure to apply address normalization and |
426 CompareTokensResult result = CompareTokens( | 430 // rewriting before doing the comparison. |
427 NormalizeForComparison(address1), NormalizeForComparison(address2)); | 431 CompareTokensResult result = |
| 432 CompareTokens(rewriter.Rewrite(NormalizeForComparison(address1)), |
| 433 rewriter.Rewrite(NormalizeForComparison(address2))); |
428 switch (result) { | 434 switch (result) { |
429 case SAME_TOKENS: | 435 case SAME_TOKENS: |
430 // They have the same set of unique tokens. Let's pick the one that's | 436 // They have the same set of unique tokens. Let's pick the one that's |
431 // longer. | 437 // longer. |
432 address->SetInfo( | 438 address->SetInfo( |
433 kStreetAddress, | 439 kStreetAddress, |
434 (p2.use_date() > p1.use_date() ? address2 : address1), | 440 (p2.use_date() > p1.use_date() ? address2 : address1), |
435 app_locale_); | 441 app_locale_); |
436 break; | 442 break; |
437 case S1_CONTAINS_S2: | 443 case S1_CONTAINS_S2: |
(...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
682 const base::string16& zip1 = NormalizeForComparison( | 688 const base::string16& zip1 = NormalizeForComparison( |
683 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); | 689 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); |
684 const base::string16& zip2 = NormalizeForComparison( | 690 const base::string16& zip2 = NormalizeForComparison( |
685 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); | 691 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); |
686 if (!zip1.empty() && !zip2.empty() && | 692 if (!zip1.empty() && !zip2.empty() && |
687 zip1.find(zip2) == base::string16::npos && | 693 zip1.find(zip2) == base::string16::npos && |
688 zip2.find(zip1) == base::string16::npos) { | 694 zip2.find(zip1) == base::string16::npos) { |
689 return false; | 695 return false; |
690 } | 696 } |
691 | 697 |
| 698 AddressRewriter rewriter = |
| 699 AddressRewriter::ForCountryCode(country1.empty() ? country2 : country1); |
| 700 |
692 // State | 701 // State |
693 // ------ | 702 // ------ |
694 // Heuristic: States are mergeable if one is a (possibly empty) bag of words | 703 // Heuristic: States are mergeable if one is a (possibly empty) bag of words |
695 // subset of the other. | 704 // subset of the other. |
696 // | 705 // |
697 // TODO(rogerm): If the match is between non-empty zip codes then we can infer | 706 // TODO(rogerm): If the match is between non-empty zip codes then we can infer |
698 // that the two state strings are intended to have the same meaning. This | 707 // that the two state strings are intended to have the same meaning. This |
699 // handles the cases where we have invalid or poorly formed data in one of the | 708 // handles the cases where we have invalid or poorly formed data in one of the |
700 // state values (like "Select one", or "CA - California"). | 709 // state values (like "Select one", or "CA - California"). |
701 const AutofillType kState(ADDRESS_HOME_STATE); | 710 const AutofillType kState(ADDRESS_HOME_STATE); |
702 const base::string16& state1 = | 711 const base::string16& state1 = |
703 NormalizeForComparison(p1.GetInfo(kState, app_locale_)); | 712 rewriter.Rewrite(NormalizeForComparison(p1.GetInfo(kState, app_locale_))); |
704 const base::string16& state2 = | 713 const base::string16& state2 = |
705 NormalizeForComparison(p2.GetInfo(kState, app_locale_)); | 714 rewriter.Rewrite(NormalizeForComparison(p2.GetInfo(kState, app_locale_))); |
706 if (!IsMatchingState(GetNonEmptyOf(p1, p2, kCountryCode), state1, state2) && | 715 if (CompareTokens(state1, state2) == DIFFERENT_TOKENS) { |
707 CompareTokens(state1, state2) == DIFFERENT_TOKENS) { | |
708 return false; | 716 return false; |
709 } | 717 } |
710 | 718 |
711 // City | 719 // City |
712 // ------ | 720 // ------ |
713 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words | 721 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words |
714 // subset of the other. | 722 // subset of the other. |
715 // | 723 // |
716 // TODO(rogerm): If the match is between non-empty zip codes then we can infer | 724 // TODO(rogerm): If the match is between non-empty zip codes then we can infer |
717 // that the two city strings are intended to have the same meaning. This | 725 // that the two city strings are intended to have the same meaning. This |
718 // handles the cases where we have a city vs one of its suburbs. | 726 // handles the cases where we have a city vs one of its suburbs. |
719 const base::string16& city1 = NormalizeForComparison( | 727 const base::string16& city1 = rewriter.Rewrite(NormalizeForComparison( |
720 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); | 728 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_))); |
721 const base::string16& city2 = NormalizeForComparison( | 729 const base::string16& city2 = rewriter.Rewrite(NormalizeForComparison( |
722 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); | 730 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_))); |
723 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) { | 731 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) { |
724 return false; | 732 return false; |
725 } | 733 } |
726 | 734 |
727 // Address | 735 // Address |
728 // -------- | 736 // -------- |
729 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag | 737 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag |
730 // of words subset of the other. | 738 // of words subset of the other. |
731 const base::string16& address1 = NormalizeForComparison( | 739 const base::string16& address1 = rewriter.Rewrite(NormalizeForComparison( |
732 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); | 740 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_))); |
733 const base::string16& address2 = NormalizeForComparison( | 741 const base::string16& address2 = rewriter.Rewrite(NormalizeForComparison( |
734 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); | 742 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_))); |
735 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) { | 743 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) { |
736 return false; | 744 return false; |
737 } | 745 } |
738 | 746 |
739 return true; | 747 return true; |
740 } | 748 } |
741 | 749 |
742 bool AutofillProfileComparator::IsMatchingState( | |
743 const base::string16& country_code, | |
744 const base::string16& state1, | |
745 const base::string16& state2) const { | |
746 if (state1 == state2) | |
747 return true; | |
748 | |
749 if (country_code != kUS) | |
750 return false; | |
751 | |
752 // TODO(rogerm): Generalize this to all locals using string equivalence rules. | |
753 base::string16 name, abbreviation; | |
754 autofill::state_names::GetNameAndAbbreviation(state1, &name, &abbreviation); | |
755 if (abbreviation.empty()) { | |
756 // state1 wasn't recognized. There's no need to compare it to state2 | |
757 return false; | |
758 } | |
759 | |
760 return state2 == name || state2 == abbreviation; | |
761 } | |
762 | |
763 } // namespace autofill | 750 } // namespace autofill |
OLD | NEW |