Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(90)

Side by Side Diff: components/autofill/core/browser/autofill_profile.cc

Issue 2013063002: Remove diacritics when normalizing autofill profile strings for comparison. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Simplify normalization loop and expand test cases. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/autofill/core/browser/autofill_profile.h" 5 #include "components/autofill/core/browser/autofill_profile.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <map> 9 #include <map>
10 #include <memory> 10 #include <memory>
(...skipping 18 matching lines...) Expand all
29 #include "components/autofill/core/browser/autofill_type.h" 29 #include "components/autofill/core/browser/autofill_type.h"
30 #include "components/autofill/core/browser/contact_info.h" 30 #include "components/autofill/core/browser/contact_info.h"
31 #include "components/autofill/core/browser/phone_number.h" 31 #include "components/autofill/core/browser/phone_number.h"
32 #include "components/autofill/core/browser/phone_number_i18n.h" 32 #include "components/autofill/core/browser/phone_number_i18n.h"
33 #include "components/autofill/core/browser/state_names.h" 33 #include "components/autofill/core/browser/state_names.h"
34 #include "components/autofill/core/browser/validation.h" 34 #include "components/autofill/core/browser/validation.h"
35 #include "components/autofill/core/common/autofill_l10n_util.h" 35 #include "components/autofill/core/common/autofill_l10n_util.h"
36 #include "components/autofill/core/common/form_field_data.h" 36 #include "components/autofill/core/common/form_field_data.h"
37 #include "grit/components_strings.h" 37 #include "grit/components_strings.h"
38 #include "third_party/icu/source/common/unicode/uchar.h" 38 #include "third_party/icu/source/common/unicode/uchar.h"
39 #include "third_party/icu/source/common/unicode/utypes.h"
40 #include "third_party/icu/source/i18n/unicode/translit.h"
39 #include "third_party/libaddressinput/chromium/addressinput_util.h" 41 #include "third_party/libaddressinput/chromium/addressinput_util.h"
40 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da ta.h" 42 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da ta.h"
41 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_fo rmatter.h" 43 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_fo rmatter.h"
42 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_me tadata.h" 44 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_me tadata.h"
43 #include "ui/base/l10n/l10n_util.h" 45 #include "ui/base/l10n/l10n_util.h"
44 46
45 using base::ASCIIToUTF16; 47 using base::ASCIIToUTF16;
46 using base::UTF16ToUTF8; 48 using base::UTF16ToUTF8;
47 using i18n::addressinput::AddressData; 49 using i18n::addressinput::AddressData;
48 using i18n::addressinput::AddressField; 50 using i18n::addressinput::AddressField;
(...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after
228 bool operator()(const base::string16& phone) { 230 bool operator()(const base::string16& phone) {
229 return i18n::PhoneNumbersMatch(phone, phone_, country_code_, app_locale_); 231 return i18n::PhoneNumbersMatch(phone, phone_, country_code_, app_locale_);
230 } 232 }
231 233
232 private: 234 private:
233 base::string16 phone_; 235 base::string16 phone_;
234 std::string country_code_; 236 std::string country_code_;
235 std::string app_locale_; 237 std::string app_locale_;
236 }; 238 };
237 239
240 base::string16 NormalizeForComparison(const base::string16& text) {
241 using icu::UnicodeString;
242 using icu::Transliterator;
243
244 // Use ICU transliteration to remove diacritics and fold case.
245 // See http://userguide.icu-project.org/transforms/general
246 UErrorCode status = U_ZERO_ERROR;
247 std::unique_ptr<Transliterator> transliterator(Transliterator::createInstance(
248 "NFD; [:Nonspacing Mark:] Remove; Lower; NFC", UTRANS_FORWARD, status));
249 if (U_FAILURE(status) || transliterator == nullptr) {
250 LOG(ERROR) << "Failed to create ICU Transliterator: "
251 << u_errorName(status);
252 return text;
253 }
254
255 UnicodeString value = UnicodeString(text.data(), text.length());
256 transliterator->transliterate(value);
257
258 return base::string16(value.getBuffer(), value.length());
259 }
260
238 } // namespace 261 } // namespace
239 262
240 AutofillProfile::AutofillProfile(const std::string& guid, 263 AutofillProfile::AutofillProfile(const std::string& guid,
241 const std::string& origin) 264 const std::string& origin)
242 : AutofillDataModel(guid, origin), 265 : AutofillDataModel(guid, origin),
243 record_type_(LOCAL_PROFILE), 266 record_type_(LOCAL_PROFILE),
244 phone_number_(this) { 267 phone_number_(this) {
245 } 268 }
246 269
247 AutofillProfile::AutofillProfile(RecordType type, const std::string& server_id) 270 AutofillProfile::AutofillProfile(RecordType type, const std::string& server_id)
(...skipping 539 matching lines...) Expand 10 before | Expand all | Expand 10 after
787 (base::Time::Now() - use_date()).InDays()); 810 (base::Time::Now() - use_date()).InDays());
788 RecordUse(); 811 RecordUse();
789 } 812 }
790 813
791 // static 814 // static
792 base::string16 AutofillProfile::CanonicalizeProfileString( 815 base::string16 AutofillProfile::CanonicalizeProfileString(
793 const base::string16& str) { 816 const base::string16& str) {
794 base::string16 ret; 817 base::string16 ret;
795 ret.reserve(str.size()); 818 ret.reserve(str.size());
796 819
797 bool previous_was_whitespace = false; 820 // This algorithm is not designed to be perfect, we could get arbitrarily
821 // fancy here trying to canonicalize address lines. Instead, this is designed
822 // to handle common cases for all types of data (addresses and names) without
823 // the need of domain-specific logic.
824 //
825 // 1. Convert punctuation to spaces and normalize all whitespace to spaces.
826 // This will convert "Mid-Island Plz." -> "Mid Island Plz " (the trailing
827 // space will be trimmed off outside of the end of the loop).
828 //
829 // 2. Collapse consecutive punctuation/whitespace characters to a single
830 // space. We pretend the string has already started with whitespace in
831 // order to trim leading spaces.
832 //
833 // 3. Remove diacritics (accents and other non-spacing marks) and perform
834 // case folding to lower-case.
798 835
799 // This algorithm isn't designed to be perfect, we could get arbitrarily 836 bool previous_was_whitespace = true;
800 // fancy here trying to canonicalize address lines. Instead, this is designed 837 for (base::i18n::UTF16CharIterator iter(&str); !iter.end(); iter.Advance()) {
801 // to handle common cases for all types of data (addresses and names)
802 // without the need of domain-specific logic.
803 base::i18n::UTF16CharIterator iter(&str);
804 while (!iter.end()) {
805 switch (u_charType(iter.get())) { 838 switch (u_charType(iter.get())) {
839 // Punctuation
806 case U_DASH_PUNCTUATION: 840 case U_DASH_PUNCTUATION:
807 case U_START_PUNCTUATION: 841 case U_START_PUNCTUATION:
808 case U_END_PUNCTUATION: 842 case U_END_PUNCTUATION:
809 case U_CONNECTOR_PUNCTUATION: 843 case U_CONNECTOR_PUNCTUATION:
810 case U_OTHER_PUNCTUATION: 844 case U_OTHER_PUNCTUATION:
811 // Convert punctuation to spaces. This will convert "Mid-Island Plz." 845 // Whitespace
812 // -> "Mid Island Plz" (the trailing space will be trimmed off at the 846 case U_CONTROL_CHAR: // To escape the '\n' character.
813 // end of the loop). 847 case U_SPACE_SEPARATOR:
848 case U_LINE_SEPARATOR:
849 case U_PARAGRAPH_SEPARATOR:
814 if (!previous_was_whitespace) { 850 if (!previous_was_whitespace) {
815 ret.push_back(' '); 851 ret.push_back(' ');
816 previous_was_whitespace = true; 852 previous_was_whitespace = true;
817 } 853 }
818 break; 854 break;
819 855
820 case U_CONTROL_CHAR: // To escape the '\n' character.
821 case U_SPACE_SEPARATOR:
822 case U_LINE_SEPARATOR:
823 case U_PARAGRAPH_SEPARATOR:
824 // Convert sequences of spaces to single spaces.
825 if (!previous_was_whitespace) {
826 ret.push_back(' ');
827 previous_was_whitespace = true;
828 }
829 break;
830
831 case U_UPPERCASE_LETTER:
832 case U_TITLECASE_LETTER:
833 previous_was_whitespace = false;
834 base::WriteUnicodeCharacter(u_tolower(iter.get()), &ret);
835 break;
836
837 default: 856 default:
838 previous_was_whitespace = false; 857 previous_was_whitespace = false;
839 base::WriteUnicodeCharacter(iter.get(), &ret); 858 base::WriteUnicodeCharacter(iter.get(), &ret);
840 break; 859 break;
841 } 860 }
842 iter.Advance();
843 } 861 }
844 862
845 // Trim off trailing whitespace if we left one. 863 // Trim off trailing whitespace if we left one.
846 if (previous_was_whitespace) 864 if (previous_was_whitespace && !ret.empty())
847 ret.resize(ret.size() - 1); 865 ret.resize(ret.size() - 1);
848 866
849 return ret; 867 // Remove diacritics and perform case folding.
868 return NormalizeForComparison(ret);
850 } 869 }
851 870
852 // static 871 // static
853 bool AutofillProfile::AreProfileStringsSimilar(const base::string16& a, 872 bool AutofillProfile::AreProfileStringsSimilar(const base::string16& a,
854 const base::string16& b) { 873 const base::string16& b) {
855 return CanonicalizeProfileString(a) == CanonicalizeProfileString(b); 874 return CanonicalizeProfileString(a) == CanonicalizeProfileString(b);
856 } 875 }
857 876
858 void AutofillProfile::GetSupportedTypes( 877 void AutofillProfile::GetSupportedTypes(
859 ServerFieldTypeSet* supported_types) const { 878 ServerFieldTypeSet* supported_types) const {
(...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after
1084 << " " << UTF16ToUTF8(profile.GetRawInfo(ADDRESS_HOME_CITY)) << " " 1103 << " " << UTF16ToUTF8(profile.GetRawInfo(ADDRESS_HOME_CITY)) << " "
1085 << UTF16ToUTF8(profile.GetRawInfo(ADDRESS_HOME_STATE)) << " " 1104 << UTF16ToUTF8(profile.GetRawInfo(ADDRESS_HOME_STATE)) << " "
1086 << UTF16ToUTF8(profile.GetRawInfo(ADDRESS_HOME_ZIP)) << " " 1105 << UTF16ToUTF8(profile.GetRawInfo(ADDRESS_HOME_ZIP)) << " "
1087 << UTF16ToUTF8(profile.GetRawInfo(ADDRESS_HOME_SORTING_CODE)) << " " 1106 << UTF16ToUTF8(profile.GetRawInfo(ADDRESS_HOME_SORTING_CODE)) << " "
1088 << UTF16ToUTF8(profile.GetRawInfo(ADDRESS_HOME_COUNTRY)) << " " 1107 << UTF16ToUTF8(profile.GetRawInfo(ADDRESS_HOME_COUNTRY)) << " "
1089 << profile.language_code() << " " 1108 << profile.language_code() << " "
1090 << UTF16ToUTF8(profile.GetRawInfo(PHONE_HOME_WHOLE_NUMBER)); 1109 << UTF16ToUTF8(profile.GetRawInfo(PHONE_HOME_WHOLE_NUMBER));
1091 } 1110 }
1092 1111
1093 } // namespace autofill 1112 } // namespace autofill
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698