Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/autofill/core/browser/autofill_profile_comparator.h" | 5 #include "components/autofill/core/browser/autofill_profile_comparator.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <vector> | 8 #include <vector> |
| 9 | 9 |
| 10 #include "base/i18n/case_conversion.h" | 10 #include "base/i18n/case_conversion.h" |
| (...skipping 490 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 501 break; | 501 break; |
| 502 case DIFFERENT_TOKENS: | 502 case DIFFERENT_TOKENS: |
| 503 default: | 503 default: |
| 504 // The addresses aren't mergeable and we shouldn't be doing any of | 504 // The addresses aren't mergeable and we shouldn't be doing any of |
| 505 // this. | 505 // this. |
| 506 NOTREACHED(); | 506 NOTREACHED(); |
| 507 return false; | 507 return false; |
| 508 } | 508 } |
| 509 } | 509 } |
| 510 | 510 |
| 511 // One of the dependend localities is empty or one of the localities has a | |
| 512 // subset of tokens from the other. Pick the locality name with more tokens; | |
| 513 // this is usually the most explicit one. | |
| 514 const AutofillType kDependentLocality(ADDRESS_HOME_DEPENDENT_LOCALITY); | |
| 515 const base::string16& locality1 = p1.GetInfo(kDependentLocality, app_locale_); | |
| 516 const base::string16& locality2 = p2.GetInfo(kDependentLocality, app_locale_); | |
| 517 if (locality1.empty()) { | |
| 518 address->SetInfo(kDependentLocality, locality2, app_locale_); | |
| 519 } else if (locality2.empty()) { | |
| 520 address->SetInfo(kDependentLocality, locality1, app_locale_); | |
| 521 } else { | |
| 522 // Prefer the one with more tokens, making sure to apply address | |
| 523 // normalization and rewriting before doing the comparison. | |
| 524 CompareTokensResult result = | |
| 525 CompareTokens(rewriter.Rewrite(NormalizeForComparison(locality1)), | |
| 526 rewriter.Rewrite(NormalizeForComparison(locality2))); | |
| 527 switch (result) { | |
| 528 case SAME_TOKENS: | |
| 529 // They have the same set of unique tokens. Let's pick the more recently | |
| 530 // used one. | |
| 531 address->SetInfo( | |
| 532 kDependentLocality, | |
| 533 (p2.use_date() > p1.use_date() ? locality2 : locality1), | |
| 534 app_locale_); | |
| 535 break; | |
| 536 case S1_CONTAINS_S2: | |
| 537 // locality1 has more unique tokens than locality2. | |
| 538 address->SetInfo(kDependentLocality, locality1, app_locale_); | |
| 539 break; | |
| 540 case S2_CONTAINS_S1: | |
| 541 // locality2 has more unique tokens than locality1. | |
| 542 address->SetInfo(kDependentLocality, locality2, app_locale_); | |
| 543 break; | |
| 544 case DIFFERENT_TOKENS: | |
| 545 default: | |
| 546 // The localities aren't mergeable and we shouldn't be doing any of | |
| 547 // this. | |
| 548 NOTREACHED(); | |
|
Mathieu
2016/11/12 13:02:30
suggestion: let's be more specific on the NOTREACH
Roger McFarlane (Chromium)
2016/11/21 18:39:51
Done. But note that this only applies to dev/debug
| |
| 549 return false; | |
| 550 } | |
| 551 } | |
| 552 | |
| 553 // One of the sorting codes is empty, they are the same, or one is a substring | |
| 554 // of the other. We prefer the most recently used sorting code. | |
| 555 const AutofillType kSortingCode(ADDRESS_HOME_SORTING_CODE); | |
| 556 const base::string16& sorting1 = p1.GetInfo(kSortingCode, app_locale_); | |
| 557 const base::string16& sorting2 = p2.GetInfo(kSortingCode, app_locale_); | |
| 558 if (sorting1.empty()) { | |
| 559 address->SetInfo(kSortingCode, sorting2, app_locale_); | |
| 560 } else if (sorting2.empty()) { | |
| 561 address->SetInfo(kSortingCode, sorting1, app_locale_); | |
| 562 } else { | |
| 563 address->SetInfo(kSortingCode, | |
| 564 (p2.use_date() > p1.use_date() ? sorting2 : sorting1), | |
| 565 app_locale_); | |
| 566 } | |
| 567 | |
| 511 // One of the addresses is empty or one of the addresses has a subset of | 568 // One of the addresses is empty or one of the addresses has a subset of |
| 512 // tokens from the other. Prefer the more verbosely expressed one. | 569 // tokens from the other. Prefer the more verbosely expressed one. |
| 513 const AutofillType kStreetAddress(ADDRESS_HOME_STREET_ADDRESS); | 570 const AutofillType kStreetAddress(ADDRESS_HOME_STREET_ADDRESS); |
| 514 const base::string16& address1 = p1.GetInfo(kStreetAddress, app_locale_); | 571 const base::string16& address1 = p1.GetInfo(kStreetAddress, app_locale_); |
| 515 const base::string16& address2 = p2.GetInfo(kStreetAddress, app_locale_); | 572 const base::string16& address2 = p2.GetInfo(kStreetAddress, app_locale_); |
| 516 // If one of the addresses is empty then use the other. | 573 // If one of the addresses is empty then use the other. |
| 517 if (address1.empty()) { | 574 if (address1.empty()) { |
| 518 address->SetInfo(kStreetAddress, address2, app_locale_); | 575 address->SetInfo(kStreetAddress, address2, app_locale_); |
| 519 } else if (address2.empty()) { | 576 } else if (address2.empty()) { |
| 520 address->SetInfo(kStreetAddress, address1, app_locale_); | 577 address->SetInfo(kStreetAddress, address1, app_locale_); |
| (...skipping 269 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 790 // If the address are not in the same country, then they're not the same. If | 847 // If the address are not in the same country, then they're not the same. If |
| 791 // one of the address countries is unknown/invalid the comparison continues. | 848 // one of the address countries is unknown/invalid the comparison continues. |
| 792 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE); | 849 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE); |
| 793 const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_); | 850 const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_); |
| 794 const base::string16& country2 = p2.GetInfo(kCountryCode, app_locale_); | 851 const base::string16& country2 = p2.GetInfo(kCountryCode, app_locale_); |
| 795 if (!country1.empty() && !country2.empty() && | 852 if (!country1.empty() && !country2.empty() && |
| 796 !case_insensitive_compare_.StringsEqual(country1, country2)) { | 853 !case_insensitive_compare_.StringsEqual(country1, country2)) { |
| 797 return false; | 854 return false; |
| 798 } | 855 } |
| 799 | 856 |
| 800 // TODO(rogerm): Lookup the normalization rules for the (common) country of | |
| 801 // the address. The rules should be applied post NormalizeForComparison to | |
| 802 // the state, city, and address bag of words comparisons. | |
| 803 | |
| 804 // Zip | 857 // Zip |
| 805 // ---- | 858 // ---- |
| 806 // If the addresses are definitely not in the same zip/area code then we're | 859 // If the addresses are definitely not in the same zip/area code then we're |
| 807 // done. Otherwise,the comparison continues. | 860 // done. Otherwise,the comparison continues. |
| 808 const AutofillType kZipCode(ADDRESS_HOME_ZIP); | 861 const AutofillType kZipCode(ADDRESS_HOME_ZIP); |
| 809 const base::string16& zip1 = NormalizeForComparison( | 862 const base::string16& zip1 = NormalizeForComparison( |
| 810 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); | 863 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); |
| 811 const base::string16& zip2 = NormalizeForComparison( | 864 const base::string16& zip2 = NormalizeForComparison( |
| 812 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); | 865 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); |
| 813 if (!zip1.empty() && !zip2.empty() && | 866 if (!zip1.empty() && !zip2.empty() && |
| 814 zip1.find(zip2) == base::string16::npos && | 867 zip1.find(zip2) == base::string16::npos && |
| 815 zip2.find(zip1) == base::string16::npos) { | 868 zip2.find(zip1) == base::string16::npos) { |
| 816 return false; | 869 return false; |
| 817 } | 870 } |
| 818 | 871 |
| 872 // Use the token rewrite rules for the (common) country of the address to | |
| 873 // transform equivalent substrings to a representative token for comparison. | |
| 819 AddressRewriter rewriter = | 874 AddressRewriter rewriter = |
| 820 AddressRewriter::ForCountryCode(country1.empty() ? country2 : country1); | 875 AddressRewriter::ForCountryCode(country1.empty() ? country2 : country1); |
| 821 | 876 |
| 822 // State | 877 // State |
| 823 // ------ | 878 // ------ |
| 824 // Heuristic: States are mergeable if one is a (possibly empty) bag of words | 879 // Heuristic: States are mergeable if one is a (possibly empty) bag of words |
| 825 // subset of the other. | 880 // subset of the other. |
| 826 // | 881 // |
| 827 // TODO(rogerm): If the match is between non-empty zip codes then we can infer | 882 // TODO(rogerm): If the match is between non-empty zip codes then we can infer |
| 828 // that the two state strings are intended to have the same meaning. This | 883 // that the two state strings are intended to have the same meaning. This |
| 829 // handles the cases where we have invalid or poorly formed data in one of the | 884 // handles the cases where we have invalid or poorly formed data in one of the |
| 830 // state values (like "Select one", or "CA - California"). | 885 // state values (like "Select one", or "CA - California"). |
| 831 const AutofillType kState(ADDRESS_HOME_STATE); | 886 const AutofillType kState(ADDRESS_HOME_STATE); |
| 832 const base::string16& state1 = | 887 const base::string16& state1 = |
| 833 rewriter.Rewrite(NormalizeForComparison(p1.GetInfo(kState, app_locale_))); | 888 rewriter.Rewrite(NormalizeForComparison(p1.GetInfo(kState, app_locale_))); |
| 834 const base::string16& state2 = | 889 const base::string16& state2 = |
| 835 rewriter.Rewrite(NormalizeForComparison(p2.GetInfo(kState, app_locale_))); | 890 rewriter.Rewrite(NormalizeForComparison(p2.GetInfo(kState, app_locale_))); |
| 836 if (CompareTokens(state1, state2) == DIFFERENT_TOKENS) { | 891 if (CompareTokens(state1, state2) == DIFFERENT_TOKENS) { |
| 837 return false; | 892 return false; |
| 838 } | 893 } |
| 839 | 894 |
| 840 // City | 895 // City |
| 841 // ------ | 896 // ------ |
| 842 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words | 897 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words |
| 843 // subset of the other. | 898 // subset of the other. |
| 844 // | 899 // |
| 845 // TODO(rogerm): If the match is between non-empty zip codes then we can infer | 900 // TODO(rogerm): If the match is between non-empty zip codes then we can infer |
| 846 // that the two city strings are intended to have the same meaning. This | 901 // that the two city strings are intended to have the same meaning. This |
| 847 // handles the cases where we have a city vs one of its suburbs. | 902 // handles the cases where we have a city vs one of its suburbs. |
| 848 const base::string16& city1 = rewriter.Rewrite(NormalizeForComparison( | 903 const AutofillType kCity(ADDRESS_HOME_CITY); |
| 849 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_))); | 904 const base::string16& city1 = |
| 850 const base::string16& city2 = rewriter.Rewrite(NormalizeForComparison( | 905 rewriter.Rewrite(NormalizeForComparison(p1.GetInfo(kCity, app_locale_))); |
| 851 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_))); | 906 const base::string16& city2 = |
| 907 rewriter.Rewrite(NormalizeForComparison(p2.GetInfo(kCity, app_locale_))); | |
| 852 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) { | 908 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) { |
| 853 return false; | 909 return false; |
| 854 } | 910 } |
| 855 | 911 |
| 912 // Dependent Locality | |
| 913 // ------------------- | |
| 914 // Heuristic: Dependent Localities are mergeable if one is a (possibly empty) | |
| 915 // bag of words subset of the other. | |
| 916 const AutofillType kDependentLocality(ADDRESS_HOME_DEPENDENT_LOCALITY); | |
| 917 const base::string16& locality1 = rewriter.Rewrite( | |
| 918 NormalizeForComparison(p1.GetInfo(kDependentLocality, app_locale_))); | |
| 919 const base::string16& locality2 = rewriter.Rewrite( | |
| 920 NormalizeForComparison(p2.GetInfo(kDependentLocality, app_locale_))); | |
| 921 if (CompareTokens(locality1, locality2) == DIFFERENT_TOKENS) { | |
| 922 return false; | |
| 923 } | |
| 924 | |
| 925 // Sorting Code | |
| 926 // ------------- | |
| 927 // Heuristic: Sorting codes are mergeable if one is empty or one is a | |
| 928 // substring of the other, post normalization and whitespace removed. This | |
| 929 // is similar to postal/zip codes. | |
| 930 const AutofillType kSortingCode(ADDRESS_HOME_SORTING_CODE); | |
| 931 const base::string16& sorting1 = NormalizeForComparison( | |
| 932 p1.GetInfo(kSortingCode, app_locale_), DISCARD_WHITESPACE); | |
| 933 const base::string16& sorting2 = NormalizeForComparison( | |
| 934 p2.GetInfo(kSortingCode, app_locale_), DISCARD_WHITESPACE); | |
| 935 if (!sorting1.empty() && !sorting2.empty() && | |
| 936 sorting1.find(sorting2) == base::string16::npos && | |
| 937 sorting2.find(sorting1) == base::string16::npos) { | |
| 938 return false; | |
| 939 } | |
| 940 | |
| 856 // Address | 941 // Address |
| 857 // -------- | 942 // -------- |
| 858 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag | 943 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag |
| 859 // of words subset of the other. | 944 // of words subset of the other. |
| 860 const base::string16& address1 = rewriter.Rewrite(NormalizeForComparison( | 945 const base::string16& address1 = rewriter.Rewrite(NormalizeForComparison( |
| 861 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_))); | 946 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_))); |
| 862 const base::string16& address2 = rewriter.Rewrite(NormalizeForComparison( | 947 const base::string16& address2 = rewriter.Rewrite(NormalizeForComparison( |
| 863 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_))); | 948 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_))); |
| 864 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) { | 949 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) { |
| 865 return false; | 950 return false; |
| 866 } | 951 } |
| 867 | 952 |
| 868 return true; | 953 return true; |
| 869 } | 954 } |
| 870 | 955 |
| 871 } // namespace autofill | 956 } // namespace autofill |
| OLD | NEW |