Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(296)

Side by Side Diff: components/autofill/core/browser/autofill_profile_comparator.cc

Issue 2137533002: Embed address normalization rewriting rules. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/autofill/core/browser/autofill_profile_comparator.h" 5 #include "components/autofill/core/browser/autofill_profile_comparator.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <vector> 8 #include <vector>
9 9
10 #include "base/i18n/case_conversion.h" 10 #include "base/i18n/case_conversion.h"
11 #include "base/i18n/char_iterator.h" 11 #include "base/i18n/char_iterator.h"
12 #include "base/strings/string_piece.h" 12 #include "base/strings/string_piece.h"
13 #include "base/strings/string_split.h" 13 #include "base/strings/string_split.h"
14 #include "base/strings/string_util.h" 14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversion_utils.h" 15 #include "base/strings/utf_string_conversion_utils.h"
16 #include "base/strings/utf_string_conversions.h" 16 #include "base/strings/utf_string_conversions.h"
17 #include "components/autofill/core/browser/address_rewriter.h"
17 #include "components/autofill/core/browser/autofill_country.h" 18 #include "components/autofill/core/browser/autofill_country.h"
18 #include "components/autofill/core/browser/autofill_data_util.h" 19 #include "components/autofill/core/browser/autofill_data_util.h"
19 #include "components/autofill/core/browser/state_names.h" 20 #include "components/autofill/core/browser/state_names.h"
20 #include "third_party/libphonenumber/phonenumber_api.h" 21 #include "third_party/libphonenumber/phonenumber_api.h"
21 22
22 using i18n::phonenumbers::PhoneNumberUtil; 23 using i18n::phonenumbers::PhoneNumberUtil;
23 using base::UTF16ToUTF8; 24 using base::UTF16ToUTF8;
24 using base::UTF8ToUTF16; 25 using base::UTF8ToUTF16;
25 26
26 namespace autofill { 27 namespace autofill {
27 namespace { 28 namespace {
28 29
29 const base::char16 kSpace[] = {L' ', L'\0'}; 30 const base::char16 kSpace[] = {L' ', L'\0'};
30 const base::char16 kUS[] = {L'U', L'S', L'\0'};
31 31
32 bool ContainsNewline(base::StringPiece16 text) { 32 bool ContainsNewline(base::StringPiece16 text) {
33 return text.find('\n') != base::StringPiece16::npos; 33 return text.find('\n') != base::StringPiece16::npos;
34 } 34 }
35 35
36 std::ostream& operator<<(std::ostream& os, 36 std::ostream& operator<<(std::ostream& os,
37 const ::i18n::phonenumbers::PhoneNumber& n) { 37 const ::i18n::phonenumbers::PhoneNumber& n) {
38 os << "country_code: " << n.country_code() << " " 38 os << "country_code: " << n.country_code() << " "
39 << "national_number: " << n.national_number(); 39 << "national_number: " << n.national_number();
40 if (n.has_extension()) 40 if (n.has_extension())
(...skipping 316 matching lines...) Expand 10 before | Expand all | Expand 10 after
357 const base::string16& state2 = p2.GetInfo(kState, app_locale_); 357 const base::string16& state2 = p2.GetInfo(kState, app_locale_);
358 if (state1.empty()) { 358 if (state1.empty()) {
359 address->SetInfo(kState, state2, app_locale_); 359 address->SetInfo(kState, state2, app_locale_);
360 } else if (state2.empty()) { 360 } else if (state2.empty()) {
361 address->SetInfo(kState, state1, app_locale_); 361 address->SetInfo(kState, state1, app_locale_);
362 } else { 362 } else {
363 address->SetInfo(kState, (state2.size() < state1.size() ? state2 : state1), 363 address->SetInfo(kState, (state2.size() < state1.size() ? state2 : state1),
364 app_locale_); 364 app_locale_);
365 } 365 }
366 366
367 AddressRewriter rewriter = AddressRewriter::ForCountryCode(country_code);
368
367 // One of the cities is empty or one of the cities has a subset of tokens from 369 // One of the cities is empty or one of the cities has a subset of tokens from
368 // the other. Pick the city name with more tokens; this is usually the most 370 // the other. Pick the city name with more tokens; this is usually the most
369 // explicit one. 371 // explicit one.
370 const AutofillType kCity(ADDRESS_HOME_CITY); 372 const AutofillType kCity(ADDRESS_HOME_CITY);
371 const base::string16& city1 = p1.GetInfo(kCity, app_locale_); 373 const base::string16& city1 = p1.GetInfo(kCity, app_locale_);
372 const base::string16& city2 = p2.GetInfo(kCity, app_locale_); 374 const base::string16& city2 = p2.GetInfo(kCity, app_locale_);
373 if (city1.empty()) { 375 if (city1.empty()) {
374 address->SetInfo(kCity, city2, app_locale_); 376 address->SetInfo(kCity, city2, app_locale_);
375 } else if (city2.empty()) { 377 } else if (city2.empty()) {
376 address->SetInfo(kCity, city1, app_locale_); 378 address->SetInfo(kCity, city1, app_locale_);
377 } else { 379 } else {
378 // Prefer the one with more tokens. 380 // Prefer the one with more tokens, making sure to apply address
379 CompareTokensResult result = CompareTokens(NormalizeForComparison(city1), 381 // normalization and rewriting before doing the comparison.
380 NormalizeForComparison(city2)); 382 CompareTokensResult result =
383 CompareTokens(rewriter.Rewrite(NormalizeForComparison(city1)),
384 rewriter.Rewrite(NormalizeForComparison(city2)));
381 switch (result) { 385 switch (result) {
382 case SAME_TOKENS: 386 case SAME_TOKENS:
383 // They have the same set of unique tokens. Let's pick the more recently 387 // They have the same set of unique tokens. Let's pick the more recently
384 // used one. 388 // used one.
385 address->SetInfo(kCity, (p2.use_date() > p1.use_date() ? city2 : city1), 389 address->SetInfo(kCity, (p2.use_date() > p1.use_date() ? city2 : city1),
386 app_locale_); 390 app_locale_);
387 break; 391 break;
388 case S1_CONTAINS_S2: 392 case S1_CONTAINS_S2:
389 // city1 has more unique tokens than city2. 393 // city1 has more unique tokens than city2.
390 address->SetInfo(kCity, city1, app_locale_); 394 address->SetInfo(kCity, city1, app_locale_);
(...skipping 24 matching lines...) Expand all
415 } else { 419 } else {
416 // Prefer the multi-line address if one is multi-line and the other isn't. 420 // Prefer the multi-line address if one is multi-line and the other isn't.
417 bool address1_multiline = ContainsNewline(address1); 421 bool address1_multiline = ContainsNewline(address1);
418 bool address2_multiline = ContainsNewline(address2); 422 bool address2_multiline = ContainsNewline(address2);
419 if (address1_multiline && !address2_multiline) { 423 if (address1_multiline && !address2_multiline) {
420 address->SetInfo(kStreetAddress, address1, app_locale_); 424 address->SetInfo(kStreetAddress, address1, app_locale_);
421 } else if (address2_multiline && !address1_multiline) { 425 } else if (address2_multiline && !address1_multiline) {
422 address->SetInfo(kStreetAddress, address2, app_locale_); 426 address->SetInfo(kStreetAddress, address2, app_locale_);
423 } else { 427 } else {
424 // Prefer the one with more tokens if they're both single-line or both 428 // Prefer the one with more tokens if they're both single-line or both
425 // multi-line addresses. 429 // multi-line addresses, making sure to apply address normalization and
426 CompareTokensResult result = CompareTokens( 430 // rewriting before doing the comparison.
427 NormalizeForComparison(address1), NormalizeForComparison(address2)); 431 CompareTokensResult result =
432 CompareTokens(rewriter.Rewrite(NormalizeForComparison(address1)),
433 rewriter.Rewrite(NormalizeForComparison(address2)));
428 switch (result) { 434 switch (result) {
429 case SAME_TOKENS: 435 case SAME_TOKENS:
430 // They have the same set of unique tokens. Let's pick the one that's 436 // They have the same set of unique tokens. Let's pick the one that's
431 // longer. 437 // longer.
432 address->SetInfo( 438 address->SetInfo(
433 kStreetAddress, 439 kStreetAddress,
434 (p2.use_date() > p1.use_date() ? address2 : address1), 440 (p2.use_date() > p1.use_date() ? address2 : address1),
435 app_locale_); 441 app_locale_);
436 break; 442 break;
437 case S1_CONTAINS_S2: 443 case S1_CONTAINS_S2:
(...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after
682 const base::string16& zip1 = NormalizeForComparison( 688 const base::string16& zip1 = NormalizeForComparison(
683 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); 689 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);
684 const base::string16& zip2 = NormalizeForComparison( 690 const base::string16& zip2 = NormalizeForComparison(
685 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); 691 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);
686 if (!zip1.empty() && !zip2.empty() && 692 if (!zip1.empty() && !zip2.empty() &&
687 zip1.find(zip2) == base::string16::npos && 693 zip1.find(zip2) == base::string16::npos &&
688 zip2.find(zip1) == base::string16::npos) { 694 zip2.find(zip1) == base::string16::npos) {
689 return false; 695 return false;
690 } 696 }
691 697
698 AddressRewriter rewriter =
699 AddressRewriter::ForCountryCode(country1.empty() ? country2 : country1);
700
692 // State 701 // State
693 // ------ 702 // ------
694 // Heuristic: States are mergeable if one is a (possibly empty) bag of words 703 // Heuristic: States are mergeable if one is a (possibly empty) bag of words
695 // subset of the other. 704 // subset of the other.
696 // 705 //
697 // TODO(rogerm): If the match is between non-empty zip codes then we can infer 706 // TODO(rogerm): If the match is between non-empty zip codes then we can infer
698 // that the two state strings are intended to have the same meaning. This 707 // that the two state strings are intended to have the same meaning. This
699 // handles the cases where we have invalid or poorly formed data in one of the 708 // handles the cases where we have invalid or poorly formed data in one of the
700 // state values (like "Select one", or "CA - California"). 709 // state values (like "Select one", or "CA - California").
701 const AutofillType kState(ADDRESS_HOME_STATE); 710 const AutofillType kState(ADDRESS_HOME_STATE);
702 const base::string16& state1 = 711 const base::string16& state1 =
703 NormalizeForComparison(p1.GetInfo(kState, app_locale_)); 712 rewriter.Rewrite(NormalizeForComparison(p1.GetInfo(kState, app_locale_)));
704 const base::string16& state2 = 713 const base::string16& state2 =
705 NormalizeForComparison(p2.GetInfo(kState, app_locale_)); 714 rewriter.Rewrite(NormalizeForComparison(p2.GetInfo(kState, app_locale_)));
706 if (!IsMatchingState(GetNonEmptyOf(p1, p2, kCountryCode), state1, state2) && 715 if (CompareTokens(state1, state2) == DIFFERENT_TOKENS) {
707 CompareTokens(state1, state2) == DIFFERENT_TOKENS) {
708 return false; 716 return false;
709 } 717 }
710 718
711 // City 719 // City
712 // ------ 720 // ------
713 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words 721 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words
714 // subset of the other. 722 // subset of the other.
715 // 723 //
716 // TODO(rogerm): If the match is between non-empty zip codes then we can infer 724 // TODO(rogerm): If the match is between non-empty zip codes then we can infer
717 // that the two city strings are intended to have the same meaning. This 725 // that the two city strings are intended to have the same meaning. This
718 // handles the cases where we have a city vs one of its suburbs. 726 // handles the cases where we have a city vs one of its suburbs.
719 const base::string16& city1 = NormalizeForComparison( 727 const base::string16& city1 = rewriter.Rewrite(NormalizeForComparison(
720 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); 728 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)));
721 const base::string16& city2 = NormalizeForComparison( 729 const base::string16& city2 = rewriter.Rewrite(NormalizeForComparison(
722 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); 730 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)));
723 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) { 731 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) {
724 return false; 732 return false;
725 } 733 }
726 734
727 // Address 735 // Address
728 // -------- 736 // --------
729 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag 737 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag
730 // of words subset of the other. 738 // of words subset of the other.
731 const base::string16& address1 = NormalizeForComparison( 739 const base::string16& address1 = rewriter.Rewrite(NormalizeForComparison(
732 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); 740 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)));
733 const base::string16& address2 = NormalizeForComparison( 741 const base::string16& address2 = rewriter.Rewrite(NormalizeForComparison(
734 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); 742 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)));
735 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) { 743 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) {
736 return false; 744 return false;
737 } 745 }
738 746
739 return true; 747 return true;
740 } 748 }
741 749
742 bool AutofillProfileComparator::IsMatchingState(
743 const base::string16& country_code,
744 const base::string16& state1,
745 const base::string16& state2) const {
746 if (state1 == state2)
747 return true;
748
749 if (country_code != kUS)
750 return false;
751
752 // TODO(rogerm): Generalize this to all locals using string equivalence rules.
753 base::string16 name, abbreviation;
754 autofill::state_names::GetNameAndAbbreviation(state1, &name, &abbreviation);
755 if (abbreviation.empty()) {
756 // state1 wasn't recognized. There's no need to compare it to state2
757 return false;
758 }
759
760 return state2 == name || state2 == abbreviation;
761 }
762
763 } // namespace autofill 750 } // namespace autofill
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698