components/autofill/core/browser/autofill_profile_comparator.cc - Issue 2121253002: Embed address normalization rewriting rules.

Side by Side Diff: components/autofill/core/browser/autofill_profile_comparator.cc

Issue 2121253002: Embed address normalization rewriting rules. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: fix try bots Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« components/autofill/core/browser/address_rewriter.cc ('K') | « components/autofill/core/browser/autofill_profile_comparator.h ('k') | components/components_tests.gyp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2016 The Chromium Authors. All rights reserved.	1 // Copyright 2016 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/autofill/core/browser/autofill_profile_comparator.h"	5 #include "components/autofill/core/browser/autofill_profile_comparator.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <vector>	8 #include <vector>

9	9

10 #include "base/i18n/case_conversion.h"	10 #include "base/i18n/case_conversion.h"

11 #include "base/i18n/char_iterator.h"	11 #include "base/i18n/char_iterator.h"

12 #include "base/strings/string_piece.h"	12 #include "base/strings/string_piece.h"

13 #include "base/strings/string_split.h"	13 #include "base/strings/string_split.h"

14 #include "base/strings/string_util.h"	14 #include "base/strings/string_util.h"

15 #include "base/strings/utf_string_conversion_utils.h"	15 #include "base/strings/utf_string_conversion_utils.h"

16 #include "base/strings/utf_string_conversions.h"	16 #include "base/strings/utf_string_conversions.h"

	17 #include "components/autofill/core/browser/address_rewriter.h"

17 #include "components/autofill/core/browser/autofill_country.h"	18 #include "components/autofill/core/browser/autofill_country.h"

18 #include "components/autofill/core/browser/autofill_data_util.h"	19 #include "components/autofill/core/browser/autofill_data_util.h"

19 #include "components/autofill/core/browser/state_names.h"	20 #include "components/autofill/core/browser/state_names.h"

20 #include "third_party/libphonenumber/phonenumber_api.h"	21 #include "third_party/libphonenumber/phonenumber_api.h"

21	22

22 using i18n::phonenumbers::PhoneNumberUtil;	23 using i18n::phonenumbers::PhoneNumberUtil;

23 using base::UTF16ToUTF8;	24 using base::UTF16ToUTF8;

24 using base::UTF8ToUTF16;	25 using base::UTF8ToUTF16;

25	26

26 namespace autofill {	27 namespace autofill {

27 namespace {	28 namespace {

28	29

29 const base::char16 kSpace[] = {L' ', L'\0'};	30 const base::char16 kSpace[] = {L' ', L'\0'};

30 const base::char16 kUS[] = {L'U', L'S', L'\0'};

31	31

32 bool ContainsNewline(base::StringPiece16 text) {	32 bool ContainsNewline(base::StringPiece16 text) {

33 return text.find('\n') != base::StringPiece16::npos;	33 return text.find('\n') != base::StringPiece16::npos;

34 }	34 }

35	35

36 std::ostream& operator<<(std::ostream& os,	36 std::ostream& operator<<(std::ostream& os,

37 const ::i18n::phonenumbers::PhoneNumber& n) {	37 const ::i18n::phonenumbers::PhoneNumber& n) {

38 os << "country_code: " << n.country_code() << " "	38 os << "country_code: " << n.country_code() << " "

39 << "national_number: " << n.national_number();	39 << "national_number: " << n.national_number();

40 if (n.has_extension())	40 if (n.has_extension())

(...skipping 641 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
682 const base::string16& zip1 = NormalizeForComparison(	682 const base::string16& zip1 = NormalizeForComparison(

683 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);	683 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);

684 const base::string16& zip2 = NormalizeForComparison(	684 const base::string16& zip2 = NormalizeForComparison(

685 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);	685 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE);

686 if (!zip1.empty() && !zip2.empty() &&	686 if (!zip1.empty() && !zip2.empty() &&

687 zip1.find(zip2) == base::string16::npos &&	687 zip1.find(zip2) == base::string16::npos &&

688 zip2.find(zip1) == base::string16::npos) {	688 zip2.find(zip1) == base::string16::npos) {

689 return false;	689 return false;

690 }	690 }

691	691

	692 AddressRewriter rewriter =

	693 AddressRewriter::ForCountryCode(country1.empty() ? country2 : country1);

	694

692 // State	695 // State

693 // ------	696 // ------

694 // Heuristic: States are mergeable if one is a (possibly empty) bag of words	697 // Heuristic: States are mergeable if one is a (possibly empty) bag of words

695 // subset of the other.	698 // subset of the other.

696 //	699 //

697 // TODO(rogerm): If the match is between non-empty zip codes then we can infer	700 // TODO(rogerm): If the match is between non-empty zip codes then we can infer

698 // that the two state strings are intended to have the same meaning. This	701 // that the two state strings are intended to have the same meaning. This

699 // handles the cases where we have invalid or poorly formed data in one of the	702 // handles the cases where we have invalid or poorly formed data in one of the

700 // state values (like "Select one", or "CA - California").	703 // state values (like "Select one", or "CA - California").

701 const AutofillType kState(ADDRESS_HOME_STATE);	704 const AutofillType kState(ADDRESS_HOME_STATE);

702 const base::string16& state1 =	705 const base::string16& state1 =

703 NormalizeForComparison(p1.GetInfo(kState, app_locale_));	706 rewriter.Rewrite(NormalizeForComparison(p1.GetInfo(kState, app_locale_)));

704 const base::string16& state2 =	707 const base::string16& state2 =

705 NormalizeForComparison(p2.GetInfo(kState, app_locale_));	708 rewriter.Rewrite(NormalizeForComparison(p2.GetInfo(kState, app_locale_)));

706 if (!IsMatchingState(GetNonEmptyOf(p1, p2, kCountryCode), state1, state2) &&	709 if (CompareTokens(state1, state2) == DIFFERENT_TOKENS) {

707 CompareTokens(state1, state2) == DIFFERENT_TOKENS) {

708 return false;	710 return false;

709 }	711 }

710	712

711 // City	713 // City

712 // ------	714 // ------

713 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words	715 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words

714 // subset of the other.	716 // subset of the other.

715 //	717 //

716 // TODO(rogerm): If the match is between non-empty zip codes then we can infer	718 // TODO(rogerm): If the match is between non-empty zip codes then we can infer

717 // that the two city strings are intended to have the same meaning. This	719 // that the two city strings are intended to have the same meaning. This

718 // handles the cases where we have a city vs one of its suburbs.	720 // handles the cases where we have a city vs one of its suburbs.

719 const base::string16& city1 = NormalizeForComparison(	721 const base::string16& city1 = rewriter.Rewrite(NormalizeForComparison(

720 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));	722 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)));

721 const base::string16& city2 = NormalizeForComparison(	723 const base::string16& city2 = rewriter.Rewrite(NormalizeForComparison(

722 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_));	724 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)));

723 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) {	725 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) {

724 return false;	726 return false;

725 }	727 }

726	728

727 // Address	729 // Address

728 // --------	730 // --------

729 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag	731 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag

730 // of words subset of the other.	732 // of words subset of the other.

731 const base::string16& address1 = NormalizeForComparison(	733 const base::string16& address1 = rewriter.Rewrite(NormalizeForComparison(

732 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));	734 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)));

733 const base::string16& address2 = NormalizeForComparison(	735 const base::string16& address2 = rewriter.Rewrite(NormalizeForComparison(

734 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_));	736 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)));

735 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) {	737 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) {

736 return false;	738 return false;

737 }	739 }

738	740

739 return true;	741 return true;

740 }	742 }

741	743

742 bool AutofillProfileComparator::IsMatchingState(

743 const base::string16& country_code,

744 const base::string16& state1,

745 const base::string16& state2) const {

746 if (state1 == state2)

747 return true;

748

749 if (country_code != kUS)

750 return false;

751

752 // TODO(rogerm): Generalize this to all locals using string equivalence rules.

753 base::string16 name, abbreviation;

754 autofill::state_names::GetNameAndAbbreviation(state1, &name, &abbreviation);

755 if (abbreviation.empty()) {

756 // state1 wasn't recognized. There's no need to compare it to state2

757 return false;

758 }

759

760 return state2 == name \|\| state2 == abbreviation;

761 }

762

763 } // namespace autofill	744 } // namespace autofill

OLD	NEW