Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/autofill/core/browser/autofill_profile_comparator.h" | 5 #include "components/autofill/core/browser/autofill_profile_comparator.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <vector> | 8 #include <vector> |
| 9 | 9 |
| 10 #include "base/i18n/case_conversion.h" | 10 #include "base/i18n/case_conversion.h" |
| (...skipping 298 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 309 const base::string16* best = nullptr; | 309 const base::string16* best = nullptr; |
| 310 | 310 |
| 311 DCHECK(HaveMergeableCompanyNames(p1, p2)) | 311 DCHECK(HaveMergeableCompanyNames(p1, p2)) |
| 312 << "Company names are not mergeable: '" << c1 << "' vs '" << c2 << "'"; | 312 << "Company names are not mergeable: '" << c1 << "' vs '" << c2 << "'"; |
| 313 | 313 |
| 314 CompareTokensResult result = | 314 CompareTokensResult result = |
| 315 CompareTokens(NormalizeForComparison(c1), NormalizeForComparison(c2)); | 315 CompareTokens(NormalizeForComparison(c1), NormalizeForComparison(c2)); |
| 316 switch (result) { | 316 switch (result) { |
| 317 case DIFFERENT_TOKENS: | 317 case DIFFERENT_TOKENS: |
| 318 default: | 318 default: |
| 319 NOTREACHED(); | 319 NOTREACHED() << "Unexpected mismatch: '" << c1 << "' vs '" << c2 << "'"; |
| 320 return false; | 320 return false; |
| 321 case S1_CONTAINS_S2: | 321 case S1_CONTAINS_S2: |
| 322 best = &c1; | 322 best = &c1; |
| 323 break; | 323 break; |
| 324 case S2_CONTAINS_S1: | 324 case S2_CONTAINS_S1: |
| 325 best = &c2; | 325 best = &c2; |
| 326 break; | 326 break; |
| 327 case SAME_TOKENS: | 327 case SAME_TOKENS: |
| 328 best = p2.use_date() > p1.use_date() ? &c2 : &c1; | 328 best = p2.use_date() > p1.use_date() ? &c2 : &c1; |
| 329 break; | 329 break; |
| (...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 494 case S1_CONTAINS_S2: | 494 case S1_CONTAINS_S2: |
| 495 // city1 has more unique tokens than city2. | 495 // city1 has more unique tokens than city2. |
| 496 address->SetInfo(kCity, city1, app_locale_); | 496 address->SetInfo(kCity, city1, app_locale_); |
| 497 break; | 497 break; |
| 498 case S2_CONTAINS_S1: | 498 case S2_CONTAINS_S1: |
| 499 // city2 has more unique tokens than city1. | 499 // city2 has more unique tokens than city1. |
| 500 address->SetInfo(kCity, city2, app_locale_); | 500 address->SetInfo(kCity, city2, app_locale_); |
| 501 break; | 501 break; |
| 502 case DIFFERENT_TOKENS: | 502 case DIFFERENT_TOKENS: |
| 503 default: | 503 default: |
| 504 // The addresses aren't mergeable and we shouldn't be doing any of | 504 // The cities aren't mergeable and we shouldn't be doing any of |
| 505 // this. | 505 // this. |
| 506 NOTREACHED(); | 506 NOTREACHED() << "Unexpected mismatch: '" << city1 << "' vs '" << city2 |
| 507 << "'"; | |
| 507 return false; | 508 return false; |
| 508 } | 509 } |
| 509 } | 510 } |
| 510 | 511 |
| 512 // One of the dependend localities is empty or one of the localities has a | |
| 513 // subset of tokens from the other. Pick the locality name with more tokens; | |
| 514 // this is usually the most explicit one. | |
| 515 const AutofillType kDependentLocality(ADDRESS_HOME_DEPENDENT_LOCALITY); | |
| 516 const base::string16& locality1 = p1.GetInfo(kDependentLocality, app_locale_); | |
| 517 const base::string16& locality2 = p2.GetInfo(kDependentLocality, app_locale_); | |
| 518 if (locality1.empty()) { | |
| 519 address->SetInfo(kDependentLocality, locality2, app_locale_); | |
| 520 } else if (locality2.empty()) { | |
| 521 address->SetInfo(kDependentLocality, locality1, app_locale_); | |
| 522 } else { | |
| 523 // Prefer the one with more tokens, making sure to apply address | |
| 524 // normalization and rewriting before doing the comparison. | |
| 525 CompareTokensResult result = | |
| 526 CompareTokens(rewriter.Rewrite(NormalizeForComparison(locality1)), | |
| 527 rewriter.Rewrite(NormalizeForComparison(locality2))); | |
| 528 switch (result) { | |
| 529 case SAME_TOKENS: | |
| 530 // They have the same set of unique tokens. Let's pick the more recently | |
| 531 // used one. | |
| 532 address->SetInfo( | |
| 533 kDependentLocality, | |
| 534 (p2.use_date() > p1.use_date() ? locality2 : locality1), | |
| 535 app_locale_); | |
| 536 break; | |
| 537 case S1_CONTAINS_S2: | |
| 538 // locality1 has more unique tokens than locality2. | |
| 539 address->SetInfo(kDependentLocality, locality1, app_locale_); | |
| 540 break; | |
| 541 case S2_CONTAINS_S1: | |
| 542 // locality2 has more unique tokens than locality1. | |
| 543 address->SetInfo(kDependentLocality, locality2, app_locale_); | |
| 544 break; | |
| 545 case DIFFERENT_TOKENS: | |
| 546 default: | |
| 547 // The localities aren't mergeable and we shouldn't be doing any of | |
| 548 // this. | |
| 549 NOTREACHED() << "Unexpected mismatch: '" << locality1 << "' vs '" | |
| 550 << locality2 << "'"; | |
| 551 return false; | |
| 552 } | |
| 553 } | |
| 554 | |
| 555 // One of the sorting codes is empty, they are the same, or one is a substring | |
| 556 // of the other. We prefer the most recently used sorting code. | |
| 557 const AutofillType kSortingCode(ADDRESS_HOME_SORTING_CODE); | |
| 558 const base::string16& sorting1 = p1.GetInfo(kSortingCode, app_locale_); | |
| 559 const base::string16& sorting2 = p2.GetInfo(kSortingCode, app_locale_); | |
| 560 if (sorting1.empty()) { | |
| 561 address->SetInfo(kSortingCode, sorting2, app_locale_); | |
| 562 } else if (sorting2.empty()) { | |
| 563 address->SetInfo(kSortingCode, sorting1, app_locale_); | |
| 564 } else { | |
| 565 address->SetInfo(kSortingCode, | |
| 566 (p2.use_date() > p1.use_date() ? sorting2 : sorting1), | |
| 567 app_locale_); | |
| 568 } | |
| 569 | |
| 511 // One of the addresses is empty or one of the addresses has a subset of | 570 // One of the addresses is empty or one of the addresses has a subset of |
| 512 // tokens from the other. Prefer the more verbosely expressed one. | 571 // tokens from the other. Prefer the more verbosely expressed one. |
| 513 const AutofillType kStreetAddress(ADDRESS_HOME_STREET_ADDRESS); | 572 const AutofillType kStreetAddress(ADDRESS_HOME_STREET_ADDRESS); |
| 514 const base::string16& address1 = p1.GetInfo(kStreetAddress, app_locale_); | 573 const base::string16& address1 = p1.GetInfo(kStreetAddress, app_locale_); |
| 515 const base::string16& address2 = p2.GetInfo(kStreetAddress, app_locale_); | 574 const base::string16& address2 = p2.GetInfo(kStreetAddress, app_locale_); |
| 516 // If one of the addresses is empty then use the other. | 575 // If one of the addresses is empty then use the other. |
| 517 if (address1.empty()) { | 576 if (address1.empty()) { |
| 518 address->SetInfo(kStreetAddress, address2, app_locale_); | 577 address->SetInfo(kStreetAddress, address2, app_locale_); |
| 519 } else if (address2.empty()) { | 578 } else if (address2.empty()) { |
| 520 address->SetInfo(kStreetAddress, address1, app_locale_); | 579 address->SetInfo(kStreetAddress, address1, app_locale_); |
| (...skipping 20 matching lines...) Expand all Loading... | |
| 541 kStreetAddress, | 600 kStreetAddress, |
| 542 (p2.use_date() > p1.use_date() ? address2 : address1), | 601 (p2.use_date() > p1.use_date() ? address2 : address1), |
| 543 app_locale_); | 602 app_locale_); |
| 544 break; | 603 break; |
| 545 case S1_CONTAINS_S2: | 604 case S1_CONTAINS_S2: |
| 546 // address1 has more unique tokens than address2. | 605 // address1 has more unique tokens than address2. |
| 547 address->SetInfo(kStreetAddress, address1, app_locale_); | 606 address->SetInfo(kStreetAddress, address1, app_locale_); |
| 548 break; | 607 break; |
| 549 case S2_CONTAINS_S1: | 608 case S2_CONTAINS_S1: |
| 550 // address2 has more unique tokens than address1. | 609 // address2 has more unique tokens than address1. |
| 551 address->SetInfo(kStreetAddress, address1, app_locale_); | 610 address->SetInfo(kStreetAddress, address2, app_locale_); |
|
Roger McFarlane (Chromium)
2016/11/21 18:39:51
Latent copy-paste bug. Should have been address2 h
sebsg
2016/11/21 18:48:44
Good catch :)
| |
| 552 break; | 611 break; |
| 553 case DIFFERENT_TOKENS: | 612 case DIFFERENT_TOKENS: |
| 554 default: | 613 default: |
| 555 // The addresses aren't mergeable and we shouldn't be doing any of | 614 // The addresses aren't mergeable and we shouldn't be doing any of |
| 556 // this. | 615 // this. |
| 557 NOTREACHED(); | 616 NOTREACHED() << "Unexpected mismatch: '" << address1 << "' vs '" |
| 617 << address2 << "'"; | |
| 558 return false; | 618 return false; |
| 559 } | 619 } |
| 560 } | 620 } |
| 561 } | 621 } |
| 562 return true; | 622 return true; |
| 563 } | 623 } |
| 564 | 624 |
| 565 // static | 625 // static |
| 566 std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens( | 626 std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens( |
| 567 base::StringPiece16 s) { | 627 base::StringPiece16 s) { |
| (...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 764 // SHORT_NSN_MATCH and just call that instead of accessing the underlying | 824 // SHORT_NSN_MATCH and just call that instead of accessing the underlying |
| 765 // utility library directly? | 825 // utility library directly? |
| 766 | 826 |
| 767 // The phone number util library needs the numbers in utf8. | 827 // The phone number util library needs the numbers in utf8. |
| 768 const std::string phone_1 = base::UTF16ToUTF8(raw_phone_1); | 828 const std::string phone_1 = base::UTF16ToUTF8(raw_phone_1); |
| 769 const std::string phone_2 = base::UTF16ToUTF8(raw_phone_2); | 829 const std::string phone_2 = base::UTF16ToUTF8(raw_phone_2); |
| 770 | 830 |
| 771 // Parse and compare the phone numbers. | 831 // Parse and compare the phone numbers. |
| 772 PhoneNumberUtil* phone_util = PhoneNumberUtil::GetInstance(); | 832 PhoneNumberUtil* phone_util = PhoneNumberUtil::GetInstance(); |
| 773 switch (phone_util->IsNumberMatchWithTwoStrings(phone_1, phone_2)) { | 833 switch (phone_util->IsNumberMatchWithTwoStrings(phone_1, phone_2)) { |
| 774 case PhoneNumberUtil::INVALID_NUMBER: | |
| 775 case PhoneNumberUtil::NO_MATCH: | |
| 776 return false; | |
| 777 case PhoneNumberUtil::SHORT_NSN_MATCH: | 834 case PhoneNumberUtil::SHORT_NSN_MATCH: |
| 778 case PhoneNumberUtil::NSN_MATCH: | 835 case PhoneNumberUtil::NSN_MATCH: |
| 779 case PhoneNumberUtil::EXACT_MATCH: | 836 case PhoneNumberUtil::EXACT_MATCH: |
| 780 return true; | 837 return true; |
| 838 case PhoneNumberUtil::INVALID_NUMBER: | |
| 839 case PhoneNumberUtil::NO_MATCH: | |
| 840 return false; | |
| 841 default: | |
| 842 NOTREACHED(); | |
| 843 return false; | |
| 781 } | 844 } |
| 782 | |
| 783 NOTREACHED(); | |
| 784 return false; | |
| 785 } | 845 } |
| 786 | 846 |
| 787 bool AutofillProfileComparator::HaveMergeableAddresses( | 847 bool AutofillProfileComparator::HaveMergeableAddresses( |
| 788 const AutofillProfile& p1, | 848 const AutofillProfile& p1, |
| 789 const AutofillProfile& p2) const { | 849 const AutofillProfile& p2) const { |
| 790 // If the address are not in the same country, then they're not the same. If | 850 // If the address are not in the same country, then they're not the same. If |
| 791 // one of the address countries is unknown/invalid the comparison continues. | 851 // one of the address countries is unknown/invalid the comparison continues. |
| 792 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE); | 852 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE); |
| 793 const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_); | 853 const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_); |
| 794 const base::string16& country2 = p2.GetInfo(kCountryCode, app_locale_); | 854 const base::string16& country2 = p2.GetInfo(kCountryCode, app_locale_); |
| 795 if (!country1.empty() && !country2.empty() && | 855 if (!country1.empty() && !country2.empty() && |
| 796 !case_insensitive_compare_.StringsEqual(country1, country2)) { | 856 !case_insensitive_compare_.StringsEqual(country1, country2)) { |
| 797 return false; | 857 return false; |
| 798 } | 858 } |
| 799 | 859 |
| 800 // TODO(rogerm): Lookup the normalization rules for the (common) country of | |
| 801 // the address. The rules should be applied post NormalizeForComparison to | |
| 802 // the state, city, and address bag of words comparisons. | |
| 803 | |
| 804 // Zip | 860 // Zip |
| 805 // ---- | 861 // ---- |
| 806 // If the addresses are definitely not in the same zip/area code then we're | 862 // If the addresses are definitely not in the same zip/area code then we're |
| 807 // done. Otherwise,the comparison continues. | 863 // done. Otherwise,the comparison continues. |
| 808 const AutofillType kZipCode(ADDRESS_HOME_ZIP); | 864 const AutofillType kZipCode(ADDRESS_HOME_ZIP); |
| 809 const base::string16& zip1 = NormalizeForComparison( | 865 const base::string16& zip1 = NormalizeForComparison( |
| 810 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); | 866 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); |
| 811 const base::string16& zip2 = NormalizeForComparison( | 867 const base::string16& zip2 = NormalizeForComparison( |
| 812 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); | 868 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); |
| 813 if (!zip1.empty() && !zip2.empty() && | 869 if (!zip1.empty() && !zip2.empty() && |
| 814 zip1.find(zip2) == base::string16::npos && | 870 zip1.find(zip2) == base::string16::npos && |
| 815 zip2.find(zip1) == base::string16::npos) { | 871 zip2.find(zip1) == base::string16::npos) { |
| 816 return false; | 872 return false; |
| 817 } | 873 } |
| 818 | 874 |
| 875 // Use the token rewrite rules for the (common) country of the address to | |
| 876 // transform equivalent substrings to a representative token for comparison. | |
| 819 AddressRewriter rewriter = | 877 AddressRewriter rewriter = |
| 820 AddressRewriter::ForCountryCode(country1.empty() ? country2 : country1); | 878 AddressRewriter::ForCountryCode(country1.empty() ? country2 : country1); |
| 821 | 879 |
| 822 // State | 880 // State |
| 823 // ------ | 881 // ------ |
| 824 // Heuristic: States are mergeable if one is a (possibly empty) bag of words | 882 // Heuristic: States are mergeable if one is a (possibly empty) bag of words |
| 825 // subset of the other. | 883 // subset of the other. |
| 826 // | 884 // |
| 827 // TODO(rogerm): If the match is between non-empty zip codes then we can infer | 885 // TODO(rogerm): If the match is between non-empty zip codes then we can infer |
| 828 // that the two state strings are intended to have the same meaning. This | 886 // that the two state strings are intended to have the same meaning. This |
| 829 // handles the cases where we have invalid or poorly formed data in one of the | 887 // handles the cases where we have invalid or poorly formed data in one of the |
| 830 // state values (like "Select one", or "CA - California"). | 888 // state values (like "Select one", or "CA - California"). |
| 831 const AutofillType kState(ADDRESS_HOME_STATE); | 889 const AutofillType kState(ADDRESS_HOME_STATE); |
| 832 const base::string16& state1 = | 890 const base::string16& state1 = |
| 833 rewriter.Rewrite(NormalizeForComparison(p1.GetInfo(kState, app_locale_))); | 891 rewriter.Rewrite(NormalizeForComparison(p1.GetInfo(kState, app_locale_))); |
| 834 const base::string16& state2 = | 892 const base::string16& state2 = |
| 835 rewriter.Rewrite(NormalizeForComparison(p2.GetInfo(kState, app_locale_))); | 893 rewriter.Rewrite(NormalizeForComparison(p2.GetInfo(kState, app_locale_))); |
| 836 if (CompareTokens(state1, state2) == DIFFERENT_TOKENS) { | 894 if (CompareTokens(state1, state2) == DIFFERENT_TOKENS) { |
| 837 return false; | 895 return false; |
| 838 } | 896 } |
| 839 | 897 |
| 840 // City | 898 // City |
| 841 // ------ | 899 // ------ |
| 842 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words | 900 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words |
| 843 // subset of the other. | 901 // subset of the other. |
| 844 // | 902 // |
| 845 // TODO(rogerm): If the match is between non-empty zip codes then we can infer | 903 // TODO(rogerm): If the match is between non-empty zip codes then we can infer |
| 846 // that the two city strings are intended to have the same meaning. This | 904 // that the two city strings are intended to have the same meaning. This |
| 847 // handles the cases where we have a city vs one of its suburbs. | 905 // handles the cases where we have a city vs one of its suburbs. |
| 848 const base::string16& city1 = rewriter.Rewrite(NormalizeForComparison( | 906 const AutofillType kCity(ADDRESS_HOME_CITY); |
| 849 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_))); | 907 const base::string16& city1 = |
| 850 const base::string16& city2 = rewriter.Rewrite(NormalizeForComparison( | 908 rewriter.Rewrite(NormalizeForComparison(p1.GetInfo(kCity, app_locale_))); |
| 851 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_))); | 909 const base::string16& city2 = |
| 910 rewriter.Rewrite(NormalizeForComparison(p2.GetInfo(kCity, app_locale_))); | |
| 852 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) { | 911 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) { |
| 853 return false; | 912 return false; |
| 854 } | 913 } |
| 855 | 914 |
| 915 // Dependent Locality | |
| 916 // ------------------- | |
| 917 // Heuristic: Dependent Localities are mergeable if one is a (possibly empty) | |
| 918 // bag of words subset of the other. | |
| 919 const AutofillType kDependentLocality(ADDRESS_HOME_DEPENDENT_LOCALITY); | |
| 920 const base::string16& locality1 = rewriter.Rewrite( | |
| 921 NormalizeForComparison(p1.GetInfo(kDependentLocality, app_locale_))); | |
| 922 const base::string16& locality2 = rewriter.Rewrite( | |
| 923 NormalizeForComparison(p2.GetInfo(kDependentLocality, app_locale_))); | |
| 924 if (CompareTokens(locality1, locality2) == DIFFERENT_TOKENS) { | |
| 925 return false; | |
| 926 } | |
| 927 | |
| 928 // Sorting Code | |
| 929 // ------------- | |
| 930 // Heuristic: Sorting codes are mergeable if one is empty or one is a | |
| 931 // substring of the other, post normalization and whitespace removed. This | |
| 932 // is similar to postal/zip codes. | |
| 933 const AutofillType kSortingCode(ADDRESS_HOME_SORTING_CODE); | |
| 934 const base::string16& sorting1 = NormalizeForComparison( | |
| 935 p1.GetInfo(kSortingCode, app_locale_), DISCARD_WHITESPACE); | |
| 936 const base::string16& sorting2 = NormalizeForComparison( | |
| 937 p2.GetInfo(kSortingCode, app_locale_), DISCARD_WHITESPACE); | |
| 938 if (!sorting1.empty() && !sorting2.empty() && | |
| 939 sorting1.find(sorting2) == base::string16::npos && | |
| 940 sorting2.find(sorting1) == base::string16::npos) { | |
| 941 return false; | |
| 942 } | |
| 943 | |
| 856 // Address | 944 // Address |
| 857 // -------- | 945 // -------- |
| 858 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag | 946 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag |
| 859 // of words subset of the other. | 947 // of words subset of the other. |
| 860 const base::string16& address1 = rewriter.Rewrite(NormalizeForComparison( | 948 const base::string16& address1 = rewriter.Rewrite(NormalizeForComparison( |
| 861 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_))); | 949 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_))); |
| 862 const base::string16& address2 = rewriter.Rewrite(NormalizeForComparison( | 950 const base::string16& address2 = rewriter.Rewrite(NormalizeForComparison( |
| 863 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_))); | 951 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_))); |
| 864 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) { | 952 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) { |
| 865 return false; | 953 return false; |
| 866 } | 954 } |
| 867 | 955 |
| 868 return true; | 956 return true; |
| 869 } | 957 } |
| 870 | 958 |
| 871 } // namespace autofill | 959 } // namespace autofill |
| OLD | NEW |