Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(24)

Side by Side Diff: components/autofill/core/browser/autofill_profile_comparator.cc

Issue 2493253002: [autofill] Add address comparison/merge logic for dependent locality and sorting codes (Closed)
Patch Set: Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | components/autofill/core/browser/autofill_profile_comparator_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/autofill/core/browser/autofill_profile_comparator.h" 5 #include "components/autofill/core/browser/autofill_profile_comparator.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <vector> 8 #include <vector>
9 9
10 #include "base/i18n/case_conversion.h" 10 #include "base/i18n/case_conversion.h"
(...skipping 490 matching lines...) Expand 10 before | Expand all | Expand 10 after
501 break; 501 break;
502 case DIFFERENT_TOKENS: 502 case DIFFERENT_TOKENS:
503 default: 503 default:
504 // The addresses aren't mergeable and we shouldn't be doing any of 504 // The addresses aren't mergeable and we shouldn't be doing any of
505 // this. 505 // this.
506 NOTREACHED(); 506 NOTREACHED();
507 return false; 507 return false;
508 } 508 }
509 } 509 }
510 510
511 // One of the dependend localities is empty or one of the localities has a
512 // subset of tokens from the other. Pick the locality name with more tokens;
513 // this is usually the most explicit one.
514 const AutofillType kDependentLocality(ADDRESS_HOME_DEPENDENT_LOCALITY);
515 const base::string16& locality1 = p1.GetInfo(kDependentLocality, app_locale_);
516 const base::string16& locality2 = p2.GetInfo(kDependentLocality, app_locale_);
517 if (locality1.empty()) {
518 address->SetInfo(kDependentLocality, locality2, app_locale_);
519 } else if (locality2.empty()) {
520 address->SetInfo(kDependentLocality, locality1, app_locale_);
521 } else {
522 // Prefer the one with more tokens, making sure to apply address
523 // normalization and rewriting before doing the comparison.
524 CompareTokensResult result =
525 CompareTokens(rewriter.Rewrite(NormalizeForComparison(locality1)),
526 rewriter.Rewrite(NormalizeForComparison(locality2)));
527 switch (result) {
528 case SAME_TOKENS:
529 // They have the same set of unique tokens. Let's pick the more recently
530 // used one.
531 address->SetInfo(
532 kDependentLocality,
533 (p2.use_date() > p1.use_date() ? locality2 : locality1),
534 app_locale_);
535 break;
536 case S1_CONTAINS_S2:
537 // locality1 has more unique tokens than locality2.
538 address->SetInfo(kDependentLocality, locality1, app_locale_);
539 break;
540 case S2_CONTAINS_S1:
541 // locality2 has more unique tokens than locality1.
542 address->SetInfo(kDependentLocality, locality2, app_locale_);
543 break;
544 case DIFFERENT_TOKENS:
545 default:
546 // The localities aren't mergeable and we shouldn't be doing any of
547 // this.
548 NOTREACHED();
Mathieu 2016/11/12 13:02:30 suggestion: let's be more specific on the NOTREACH
Roger McFarlane (Chromium) 2016/11/21 18:39:51 Done. But note that this only applies to dev/debug
549 return false;
550 }
551 }
552
553 // One of the sorting codes is empty, they are the same, or one is a substring
554 // of the other. We prefer the most recently used sorting code.
555 const AutofillType kSortingCode(ADDRESS_HOME_SORTING_CODE);
556 const base::string16& sorting1 = p1.GetInfo(kSortingCode, app_locale_);
557 const base::string16& sorting2 = p2.GetInfo(kSortingCode, app_locale_);
558 if (sorting1.empty()) {
559 address->SetInfo(kSortingCode, sorting2, app_locale_);
560 } else if (sorting2.empty()) {
561 address->SetInfo(kSortingCode, sorting1, app_locale_);
562 } else {
563 address->SetInfo(kSortingCode,
564 (p2.use_date() > p1.use_date() ? sorting2 : sorting1),
565 app_locale_);
566 }
567
511 // One of the addresses is empty or one of the addresses has a subset of 568 // One of the addresses is empty or one of the addresses has a subset of
512 // tokens from the other. Prefer the more verbosely expressed one. 569 // tokens from the other. Prefer the more verbosely expressed one.
513 const AutofillType kStreetAddress(ADDRESS_HOME_STREET_ADDRESS); 570 const AutofillType kStreetAddress(ADDRESS_HOME_STREET_ADDRESS);
514 const base::string16& address1 = p1.GetInfo(kStreetAddress, app_locale_); 571 const base::string16& address1 = p1.GetInfo(kStreetAddress, app_locale_);
515 const base::string16& address2 = p2.GetInfo(kStreetAddress, app_locale_); 572 const base::string16& address2 = p2.GetInfo(kStreetAddress, app_locale_);
516 // If one of the addresses is empty then use the other. 573 // If one of the addresses is empty then use the other.
517 if (address1.empty()) { 574 if (address1.empty()) {
518 address->SetInfo(kStreetAddress, address2, app_locale_); 575 address->SetInfo(kStreetAddress, address2, app_locale_);
519 } else if (address2.empty()) { 576 } else if (address2.empty()) {
520 address->SetInfo(kStreetAddress, address1, app_locale_); 577 address->SetInfo(kStreetAddress, address1, app_locale_);
(...skipping 269 matching lines...) Expand 10 before | Expand all | Expand 10 after
790 // If the address are not in the same country, then they're not the same. If 847 // If the address are not in the same country, then they're not the same. If
791 // one of the address countries is unknown/invalid the comparison continues. 848 // one of the address countries is unknown/invalid the comparison continues.
792 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE); 849 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE);
793 const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_); 850 const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_);
794 const base::string16& country2 = p2.GetInfo(kCountryCode, app_locale_); 851 const base::string16& country2 = p2.GetInfo(kCountryCode, app_locale_);
795 if (!country1.empty() && !country2.empty() && 852 if (!country1.empty() && !country2.empty() &&
796 !case_insensitive_compare_.StringsEqual(country1, country2)) { 853 !case_insensitive_compare_.StringsEqual(country1, country2)) {
797 return false; 854 return false;
798 } 855 }
799 856
800 // TODO(rogerm): Lookup the normalization rules for the (common) country of
801 // the address. The rules should be applied post NormalizeForComparison to
802 // the state, city, and address bag of words comparisons.
803
804 // Zip 857 // Zip
805 // ---- 858 // ----
806 // If the addresses are definitely not in the same zip/area code then we're 859 // If the addresses are definitely not in the same zip/area code then we're
807 // done. Otherwise,the comparison continues. 860 // done. Otherwise,the comparison continues.
808 const AutofillType kZipCode(ADDRESS_HOME_ZIP); 861 const AutofillType kZipCode(ADDRESS_HOME_ZIP);
809 const base::string16& zip1 = NormalizeForComparison( 862 const base::string16& zip1 = NormalizeForComparison(
810 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); 863 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);
811 const base::string16& zip2 = NormalizeForComparison( 864 const base::string16& zip2 = NormalizeForComparison(
812 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); 865 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);
813 if (!zip1.empty() && !zip2.empty() && 866 if (!zip1.empty() && !zip2.empty() &&
814 zip1.find(zip2) == base::string16::npos && 867 zip1.find(zip2) == base::string16::npos &&
815 zip2.find(zip1) == base::string16::npos) { 868 zip2.find(zip1) == base::string16::npos) {
816 return false; 869 return false;
817 } 870 }
818 871
872 // Use the token rewrite rules for the (common) country of the address to
873 // transform equivalent substrings to a representative token for comparison.
819 AddressRewriter rewriter = 874 AddressRewriter rewriter =
820 AddressRewriter::ForCountryCode(country1.empty() ? country2 : country1); 875 AddressRewriter::ForCountryCode(country1.empty() ? country2 : country1);
821 876
822 // State 877 // State
823 // ------ 878 // ------
824 // Heuristic: States are mergeable if one is a (possibly empty) bag of words 879 // Heuristic: States are mergeable if one is a (possibly empty) bag of words
825 // subset of the other. 880 // subset of the other.
826 // 881 //
827 // TODO(rogerm): If the match is between non-empty zip codes then we can infer 882 // TODO(rogerm): If the match is between non-empty zip codes then we can infer
828 // that the two state strings are intended to have the same meaning. This 883 // that the two state strings are intended to have the same meaning. This
829 // handles the cases where we have invalid or poorly formed data in one of the 884 // handles the cases where we have invalid or poorly formed data in one of the
830 // state values (like "Select one", or "CA - California"). 885 // state values (like "Select one", or "CA - California").
831 const AutofillType kState(ADDRESS_HOME_STATE); 886 const AutofillType kState(ADDRESS_HOME_STATE);
832 const base::string16& state1 = 887 const base::string16& state1 =
833 rewriter.Rewrite(NormalizeForComparison(p1.GetInfo(kState, app_locale_))); 888 rewriter.Rewrite(NormalizeForComparison(p1.GetInfo(kState, app_locale_)));
834 const base::string16& state2 = 889 const base::string16& state2 =
835 rewriter.Rewrite(NormalizeForComparison(p2.GetInfo(kState, app_locale_))); 890 rewriter.Rewrite(NormalizeForComparison(p2.GetInfo(kState, app_locale_)));
836 if (CompareTokens(state1, state2) == DIFFERENT_TOKENS) { 891 if (CompareTokens(state1, state2) == DIFFERENT_TOKENS) {
837 return false; 892 return false;
838 } 893 }
839 894
840 // City 895 // City
841 // ------ 896 // ------
842 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words 897 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words
843 // subset of the other. 898 // subset of the other.
844 // 899 //
845 // TODO(rogerm): If the match is between non-empty zip codes then we can infer 900 // TODO(rogerm): If the match is between non-empty zip codes then we can infer
846 // that the two city strings are intended to have the same meaning. This 901 // that the two city strings are intended to have the same meaning. This
847 // handles the cases where we have a city vs one of its suburbs. 902 // handles the cases where we have a city vs one of its suburbs.
848 const base::string16& city1 = rewriter.Rewrite(NormalizeForComparison( 903 const AutofillType kCity(ADDRESS_HOME_CITY);
849 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_))); 904 const base::string16& city1 =
850 const base::string16& city2 = rewriter.Rewrite(NormalizeForComparison( 905 rewriter.Rewrite(NormalizeForComparison(p1.GetInfo(kCity, app_locale_)));
851 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_))); 906 const base::string16& city2 =
907 rewriter.Rewrite(NormalizeForComparison(p2.GetInfo(kCity, app_locale_)));
852 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) { 908 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) {
853 return false; 909 return false;
854 } 910 }
855 911
912 // Dependent Locality
913 // -------------------
914 // Heuristic: Dependent Localities are mergeable if one is a (possibly empty)
915 // bag of words subset of the other.
916 const AutofillType kDependentLocality(ADDRESS_HOME_DEPENDENT_LOCALITY);
917 const base::string16& locality1 = rewriter.Rewrite(
918 NormalizeForComparison(p1.GetInfo(kDependentLocality, app_locale_)));
919 const base::string16& locality2 = rewriter.Rewrite(
920 NormalizeForComparison(p2.GetInfo(kDependentLocality, app_locale_)));
921 if (CompareTokens(locality1, locality2) == DIFFERENT_TOKENS) {
922 return false;
923 }
924
925 // Sorting Code
926 // -------------
927 // Heuristic: Sorting codes are mergeable if one is empty or one is a
928 // substring of the other, post normalization and whitespace removed. This
929 // is similar to postal/zip codes.
930 const AutofillType kSortingCode(ADDRESS_HOME_SORTING_CODE);
931 const base::string16& sorting1 = NormalizeForComparison(
932 p1.GetInfo(kSortingCode, app_locale_), DISCARD_WHITESPACE);
933 const base::string16& sorting2 = NormalizeForComparison(
934 p2.GetInfo(kSortingCode, app_locale_), DISCARD_WHITESPACE);
935 if (!sorting1.empty() && !sorting2.empty() &&
936 sorting1.find(sorting2) == base::string16::npos &&
937 sorting2.find(sorting1) == base::string16::npos) {
938 return false;
939 }
940
856 // Address 941 // Address
857 // -------- 942 // --------
858 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag 943 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag
859 // of words subset of the other. 944 // of words subset of the other.
860 const base::string16& address1 = rewriter.Rewrite(NormalizeForComparison( 945 const base::string16& address1 = rewriter.Rewrite(NormalizeForComparison(
861 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_))); 946 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)));
862 const base::string16& address2 = rewriter.Rewrite(NormalizeForComparison( 947 const base::string16& address2 = rewriter.Rewrite(NormalizeForComparison(
863 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_))); 948 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)));
864 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) { 949 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) {
865 return false; 950 return false;
866 } 951 }
867 952
868 return true; 953 return true;
869 } 954 }
870 955
871 } // namespace autofill 956 } // namespace autofill
OLDNEW
« no previous file with comments | « no previous file | components/autofill/core/browser/autofill_profile_comparator_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698