Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/autofill/core/browser/autofill_profile_comparator.h" | 5 #include "components/autofill/core/browser/autofill_profile_comparator.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <vector> | 8 #include <vector> |
| 9 | 9 |
| 10 #include "base/i18n/case_conversion.h" | |
| 10 #include "base/i18n/char_iterator.h" | 11 #include "base/i18n/char_iterator.h" |
| 12 #include "base/strings/string_piece.h" | |
| 11 #include "base/strings/string_split.h" | 13 #include "base/strings/string_split.h" |
| 12 #include "base/strings/string_util.h" | 14 #include "base/strings/string_util.h" |
| 13 #include "base/strings/utf_string_conversion_utils.h" | 15 #include "base/strings/utf_string_conversion_utils.h" |
| 14 #include "base/strings/utf_string_conversions.h" | 16 #include "base/strings/utf_string_conversions.h" |
| 15 #include "components/autofill/core/browser/autofill_data_util.h" | 17 #include "components/autofill/core/browser/autofill_data_util.h" |
| 16 #include "third_party/libphonenumber/phonenumber_api.h" | 18 #include "third_party/libphonenumber/phonenumber_api.h" |
| 17 | 19 |
| 18 namespace autofill { | 20 namespace autofill { |
| 19 namespace { | 21 namespace { |
| 20 | 22 |
| 21 const base::char16 kSpace[] = {L' ', L'\0'}; | 23 const base::char16 kSpace[] = {L' ', L'\0'}; |
| 22 | 24 |
| 25 bool ContainsNewline(base::StringPiece16 text) { | |
| 26 return text.find('\n') != base::StringPiece16::npos; | |
| 27 } | |
| 28 | |
| 23 } // namespace | 29 } // namespace |
| 24 | 30 |
| 25 AutofillProfileComparator::AutofillProfileComparator( | 31 AutofillProfileComparator::AutofillProfileComparator( |
| 26 const base::StringPiece& app_locale) | 32 const base::StringPiece& app_locale) |
| 27 : app_locale_(app_locale.data(), app_locale.size()) { | 33 : app_locale_(app_locale.data(), app_locale.size()) { |
| 28 // Use ICU transliteration to remove diacritics and fold case. | 34 // Use ICU transliteration to remove diacritics and fold case. |
| 29 // See http://userguide.icu-project.org/transforms/general | 35 // See http://userguide.icu-project.org/transforms/general |
| 30 UErrorCode status = U_ZERO_ERROR; | 36 UErrorCode status = U_ZERO_ERROR; |
| 31 std::unique_ptr<icu::Transliterator> transliterator( | 37 std::unique_ptr<icu::Transliterator> transliterator( |
| 32 icu::Transliterator::createInstance( | 38 icu::Transliterator::createInstance( |
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 106 bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1, | 112 bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1, |
| 107 const AutofillProfile& p2) const { | 113 const AutofillProfile& p2) const { |
| 108 // Sorted in order to relative expense of the tests to fail early and cheaply | 114 // Sorted in order to relative expense of the tests to fail early and cheaply |
| 109 // if possible. | 115 // if possible. |
| 110 return HaveMergeableEmailAddresses(p1, p2) && | 116 return HaveMergeableEmailAddresses(p1, p2) && |
| 111 HaveMergeableCompanyNames(p1, p2) && | 117 HaveMergeableCompanyNames(p1, p2) && |
| 112 HaveMergeablePhoneNumbers(p1, p2) && HaveMergeableNames(p1, p2) && | 118 HaveMergeablePhoneNumbers(p1, p2) && HaveMergeableNames(p1, p2) && |
| 113 HaveMergeableAddresses(p1, p2); | 119 HaveMergeableAddresses(p1, p2); |
| 114 } | 120 } |
| 115 | 121 |
| 122 bool AutofillProfileComparator::MergeNames(const AutofillProfile& p1, | |
| 123 const AutofillProfile& p2, | |
| 124 NameInfo* name_info) const { | |
| 125 DCHECK(HaveMergeableNames(p1, p2)); | |
| 126 | |
| 127 const AutofillType kFullName(NAME_FULL); | |
| 128 const base::string16& full_name_1 = p1.GetInfo(kFullName, app_locale_); | |
| 129 const base::string16& full_name_2 = p2.GetInfo(kFullName, app_locale_); | |
| 130 const base::string16& normalized_full_name_1 = | |
| 131 NormalizeForComparison(full_name_1); | |
| 132 const base::string16& normalized_full_name_2 = | |
| 133 NormalizeForComparison(full_name_2); | |
| 134 | |
| 135 const base::string16* best_name = nullptr; | |
| 136 if (normalized_full_name_1.empty()) { | |
| 137 // p1 has no name, so use the name from p2. | |
| 138 best_name = &full_name_2; | |
| 139 } else if (normalized_full_name_2.empty()) { | |
| 140 // p2 has no name, so use the name from p1. | |
| 141 best_name = &full_name_1; | |
| 142 } else if (IsNameVariantOf(normalized_full_name_1, normalized_full_name_2)) { | |
| 143 // full_name_2 is a variant of full_name_1. | |
| 144 best_name = &full_name_1; | |
| 145 } else { | |
| 146 // If the assertion that p1 and p2 have mergeable names is true, then | |
| 147 // full_name_1 must be a name variant of full_name_2; | |
| 148 best_name = &full_name_2; | |
| 149 } | |
| 150 | |
| 151 name_info->SetInfo(AutofillType(NAME_FULL), *best_name, app_locale_); | |
| 152 return true; | |
| 153 } | |
| 154 | |
| 155 bool AutofillProfileComparator::MergeAddresses(const AutofillProfile& p1, | |
| 156 const AutofillProfile& p2, | |
| 157 Address* address) const { | |
| 158 DCHECK(HaveMergeableAddresses(p1, p2)); | |
| 159 | |
| 160 // One of the countries is empty or they are the same modulo case, so we just | |
| 161 // have to find the non-empty one, if any. | |
| 162 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE); | |
| 163 const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_); | |
| 164 address->SetInfo( | |
| 165 kCountryCode, | |
| 166 base::i18n::ToUpper( | |
| 167 country1.empty() ? country1 : p2.GetInfo(kCountryCode, app_locale_)), | |
| 168 app_locale_); | |
| 169 | |
| 170 // One of the zip codes is empty, they are the same, or one is a substring | |
| 171 // of the other. So, we have to find the longest one. | |
| 172 const AutofillType kZipCode(ADDRESS_HOME_ZIP); | |
| 173 const base::string16& zip1 = p1.GetInfo(kZipCode, app_locale_); | |
| 174 const base::string16& zip2 = p2.GetInfo(kZipCode, app_locale_); | |
| 175 address->SetInfo(kZipCode, (zip1.size() > zip2.size() ? zip1 : zip2), | |
| 176 app_locale_); | |
| 177 | |
| 178 // One of the states is empty or one of the states has a subset of tokens from | |
| 179 // the other. Pick the non-empty state that is shorter. This is usually the | |
| 180 // abbreviated one. | |
| 181 const AutofillType kState(ADDRESS_HOME_STATE); | |
| 182 const base::string16& state1 = p1.GetInfo(kState, app_locale_); | |
| 183 const base::string16& state2 = p2.GetInfo(kState, app_locale_); | |
| 184 if (state1.empty()) { | |
| 185 address->SetInfo(kState, state2, app_locale_); | |
| 186 } else if (state2.empty()) { | |
| 187 address->SetInfo(kState, state1, app_locale_); | |
| 188 } else { | |
| 189 address->SetInfo(kState, (state1.size() < state2.size() ? state1 : state2), | |
| 190 app_locale_); | |
| 191 } | |
| 192 | |
| 193 // One of the cities is empty or one of the cities has a subset of tokens from | |
| 194 // the other. Pick the non-empty city that is shorter. This is usually the | |
| 195 // abbreviated one. | |
| 196 const AutofillType kCity(ADDRESS_HOME_STATE); | |
| 197 const base::string16& city1 = p1.GetInfo(kCity, app_locale_); | |
| 198 const base::string16& city2 = p2.GetInfo(kCity, app_locale_); | |
| 199 if (city1.empty()) { | |
| 200 address->SetInfo(kCity, city2, app_locale_); | |
| 201 } else if (city2.empty()) { | |
| 202 address->SetInfo(kCity, city1, app_locale_); | |
| 203 } else { | |
| 204 address->SetInfo(kCity, (city1.size() < city2.size() ? city1 : city2), | |
| 205 app_locale_); | |
| 206 } | |
| 207 | |
| 208 // One of the addresses is empty or one of the addresses has a subset of | |
| 209 // tokens from the other. Pick the non-em that is shorter. This is usually the | |
| 210 // abbreviated one. | |
| 211 const AutofillType kStreetAddress(ADDRESS_HOME_STREET_ADDRESS); | |
| 212 const base::string16& address1 = p1.GetInfo(kStreetAddress, app_locale_); | |
| 213 const base::string16& address2 = p2.GetInfo(kStreetAddress, app_locale_); | |
| 214 // If one of the addresses is empty then use the other. | |
| 215 if (address1.empty()) { | |
| 216 address->SetInfo(kStreetAddress, address2, app_locale_); | |
| 217 } else if (address2.empty()) { | |
| 218 address->SetInfo(kStreetAddress, address1, app_locale_); | |
| 219 } else { | |
| 220 // Prefer the multi-line address if one is multi-line and the other isn't. | |
| 221 bool address1_multiline = ContainsNewline(address1); | |
| 222 bool address2_multiline = ContainsNewline(address2); | |
| 223 if (address1_multiline && !address2_multiline) { | |
| 224 address->SetInfo(kStreetAddress, address1, app_locale_); | |
| 225 } else if (address2_multiline && !address1_multiline) { | |
| 226 address->SetInfo(kStreetAddress, address2, app_locale_); | |
| 227 } else { | |
| 228 // Prefer the one with more tokens if they're both single-line or both | |
| 229 // multi-line addresses. | |
| 230 int result = CompareTokens(NormalizeForComparison(address1), | |
| 231 NormalizeForComparison(address2)); | |
| 232 switch (result) { | |
| 233 case 0: | |
|
Mathieu
2016/06/21 18:55:07
use enum?
Roger McFarlane (Chromium)
2016/06/23 18:27:38
Done.
| |
| 234 // They have the same set of unique tokens. Let's pick the one that's | |
| 235 // longer. | |
| 236 address->SetInfo( | |
| 237 kStreetAddress, | |
| 238 (address1.size() > address2.size() ? address1 : address2), | |
| 239 app_locale_); | |
| 240 break; | |
| 241 case 1: | |
| 242 // address1 has more unique tokens than address2. | |
| 243 address->SetInfo(kStreetAddress, address1, app_locale_); | |
| 244 break; | |
| 245 case 2: | |
| 246 // address2 has more unique tokens than address1. | |
| 247 address->SetInfo(kStreetAddress, address1, app_locale_); | |
| 248 break; | |
| 249 default: | |
| 250 // The addresses aren't mergeable and we shouldn't be doing any of | |
| 251 // this. | |
| 252 NOTREACHED(); | |
| 253 return false; | |
| 254 } | |
| 255 } | |
| 256 } | |
| 257 return true; | |
| 258 } | |
| 259 | |
| 116 // static | 260 // static |
| 117 std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens( | 261 std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens( |
| 118 base::StringPiece16 s) { | 262 base::StringPiece16 s) { |
| 119 std::vector<base::StringPiece16> tokens = base::SplitStringPiece( | 263 std::vector<base::StringPiece16> tokens = base::SplitStringPiece( |
| 120 s, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); | 264 s, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); |
| 121 return std::set<base::StringPiece16>(tokens.begin(), tokens.end()); | 265 return std::set<base::StringPiece16>(tokens.begin(), tokens.end()); |
| 122 } | 266 } |
| 123 | 267 |
| 124 // static | 268 // static |
| 125 bool AutofillProfileComparator::HaveSameTokens(base::StringPiece16 s1, | 269 AutofillProfileComparator::CompareTokensResult |
| 126 base::StringPiece16 s2) { | 270 AutofillProfileComparator::CompareTokens(base::StringPiece16 s1, |
| 271 base::StringPiece16 s2) { | |
| 272 // Note: std::include() expects the items in each range to be in sorted order, | |
| 273 // hence the use of std::set<> instead of std::unordered_set<>. | |
| 127 std::set<base::StringPiece16> t1 = UniqueTokens(s1); | 274 std::set<base::StringPiece16> t1 = UniqueTokens(s1); |
| 128 std::set<base::StringPiece16> t2 = UniqueTokens(s2); | 275 std::set<base::StringPiece16> t2 = UniqueTokens(s2); |
| 129 | 276 |
| 130 // Note: std::include() expects the items in each range to be in sorted order, | 277 // Does s1 contains all of the tokens in s2? As a special case, return 0 if |
| 131 // hence the use of std::set<> instead of std::unordered_set<>. | 278 // the two sets are exactly the samel. |
| 132 return std::includes(t1.begin(), t1.end(), t2.begin(), t2.end()) || | 279 if (std::includes(t1.begin(), t1.end(), t2.begin(), t2.end())) |
| 133 std::includes(t2.begin(), t2.end(), t1.begin(), t1.end()); | 280 return t1.size() == t2.size() ? SAME_TOKENS : S1_CONTAINS_S2; |
| 281 | |
| 282 // Does s2 contain all of the tokens in s1? | |
| 283 if (std::includes(t2.begin(), t2.end(), t1.begin(), t1.end())) | |
| 284 return S2_CONTAINS_S1; | |
| 285 | |
| 286 // Neither string contains all of the tokens from the other. | |
| 287 return DIFFERENT_TOKENS; | |
| 134 } | 288 } |
| 135 | 289 |
| 136 // static | 290 // static |
| 137 std::set<base::string16> AutofillProfileComparator::GetNamePartVariants( | 291 std::set<base::string16> AutofillProfileComparator::GetNamePartVariants( |
| 138 const base::string16& name_part) { | 292 const base::string16& name_part) { |
| 139 const size_t kMaxSupportedSubNames = 8; | 293 const size_t kMaxSupportedSubNames = 8; |
| 140 | 294 |
| 141 std::vector<base::string16> sub_names = base::SplitString( | 295 std::vector<base::string16> sub_names = base::SplitString( |
| 142 name_part, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); | 296 name_part, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); |
| 143 | 297 |
| 144 // Limit the number of sub-names we support (to constrain memory usage); | 298 // Limit the number of sub-names we support (to constrain memory usage); |
| 145 if (sub_names.size() > kMaxSupportedSubNames) | 299 if (sub_names.size() > kMaxSupportedSubNames) |
| 146 return {name_part}; | 300 return {name_part}; |
| 147 | 301 |
| 148 // Start with the empty string as a variant. | 302 // Start with the empty string as a variant. |
| 149 std::set<base::string16> variants = {base::EmptyString16()}; | 303 std::set<base::string16> variants = {base::EmptyString16()}; |
| 150 | 304 |
| 151 // For each sub-name, add a variant of all the already existing variants that | 305 // For each sub-name, add a variant of all the already existing variants that |
| 152 // appends this sub-name and one that appends the initial of this sub-name. | 306 // appends this sub-name and one that appends the initial of this sub-name. |
| 153 // Duplicates will be discarded when they're added to the variants set. | 307 // Duplicates will be discarded when they're added to the variants set. |
| 154 for (const base::string16& sub_name : sub_names) { | 308 for (const base::string16& sub_name : sub_names) { |
| 155 if (sub_name.empty()) continue; | 309 if (sub_name.empty()) |
| 310 continue; | |
| 156 std::vector<base::string16> new_variants; | 311 std::vector<base::string16> new_variants; |
| 157 for (const base::string16& variant : variants) { | 312 for (const base::string16& variant : variants) { |
| 158 new_variants.push_back(base::CollapseWhitespace( | 313 new_variants.push_back(base::CollapseWhitespace( |
| 159 base::JoinString({variant, sub_name}, kSpace), true)); | 314 base::JoinString({variant, sub_name}, kSpace), true)); |
| 160 new_variants.push_back(base::CollapseWhitespace( | 315 new_variants.push_back(base::CollapseWhitespace( |
| 161 base::JoinString({variant, sub_name.substr(0, 1)}, kSpace), true)); | 316 base::JoinString({variant, sub_name.substr(0, 1)}, kSpace), true)); |
| 162 } | 317 } |
| 163 variants.insert(new_variants.begin(), new_variants.end()); | 318 variants.insert(new_variants.begin(), new_variants.end()); |
| 164 } | 319 } |
| 165 | 320 |
| 166 // As a common case, also add the variant that just concatenates all of the | 321 // As a common case, also add the variant that just concatenates all of the |
| 167 // initials. | 322 // initials. |
| 168 base::string16 initials; | 323 base::string16 initials; |
| 169 for (const base::string16& sub_name : sub_names) { | 324 for (const base::string16& sub_name : sub_names) { |
| 170 if (sub_name.empty()) continue; | 325 if (sub_name.empty()) |
| 326 continue; | |
| 171 initials.push_back(sub_name[0]); | 327 initials.push_back(sub_name[0]); |
| 172 } | 328 } |
| 173 variants.insert(initials); | 329 variants.insert(initials); |
| 174 | 330 |
| 175 // And, we're done. | 331 // And, we're done. |
| 176 return variants; | 332 return variants; |
| 177 } | 333 } |
| 178 | 334 |
| 179 bool AutofillProfileComparator::IsNameVariantOf( | 335 bool AutofillProfileComparator::IsNameVariantOf( |
| 180 const base::string16& full_name_1, | 336 const base::string16& full_name_1, |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 246 } | 402 } |
| 247 | 403 |
| 248 bool AutofillProfileComparator::HaveMergeableCompanyNames( | 404 bool AutofillProfileComparator::HaveMergeableCompanyNames( |
| 249 const AutofillProfile& p1, | 405 const AutofillProfile& p1, |
| 250 const AutofillProfile& p2) const { | 406 const AutofillProfile& p2) const { |
| 251 const base::string16& company_name_1 = NormalizeForComparison( | 407 const base::string16& company_name_1 = NormalizeForComparison( |
| 252 p1.GetInfo(AutofillType(COMPANY_NAME), app_locale_)); | 408 p1.GetInfo(AutofillType(COMPANY_NAME), app_locale_)); |
| 253 const base::string16& company_name_2 = NormalizeForComparison( | 409 const base::string16& company_name_2 = NormalizeForComparison( |
| 254 p2.GetInfo(AutofillType(COMPANY_NAME), app_locale_)); | 410 p2.GetInfo(AutofillType(COMPANY_NAME), app_locale_)); |
| 255 return company_name_1.empty() || company_name_2.empty() || | 411 return company_name_1.empty() || company_name_2.empty() || |
| 256 HaveSameTokens(company_name_1, company_name_2); | 412 CompareTokens(company_name_1, company_name_2) != DIFFERENT_TOKENS; |
| 257 } | 413 } |
| 258 | 414 |
| 259 bool AutofillProfileComparator::HaveMergeablePhoneNumbers( | 415 bool AutofillProfileComparator::HaveMergeablePhoneNumbers( |
| 260 const AutofillProfile& p1, | 416 const AutofillProfile& p1, |
| 261 const AutofillProfile& p2) const { | 417 const AutofillProfile& p2) const { |
| 262 // We work with the raw phone numbers to avoid losing any helpful information | 418 // We work with the raw phone numbers to avoid losing any helpful information |
| 263 // as we parse. | 419 // as we parse. |
| 264 const base::string16& raw_phone_1 = p1.GetRawInfo(PHONE_HOME_WHOLE_NUMBER); | 420 const base::string16& raw_phone_1 = p1.GetRawInfo(PHONE_HOME_WHOLE_NUMBER); |
| 265 const base::string16& raw_phone_2 = p2.GetRawInfo(PHONE_HOME_WHOLE_NUMBER); | 421 const base::string16& raw_phone_2 = p2.GetRawInfo(PHONE_HOME_WHOLE_NUMBER); |
| 266 | 422 |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 293 | 449 |
| 294 NOTREACHED(); | 450 NOTREACHED(); |
| 295 return false; | 451 return false; |
| 296 } | 452 } |
| 297 | 453 |
| 298 bool AutofillProfileComparator::HaveMergeableAddresses( | 454 bool AutofillProfileComparator::HaveMergeableAddresses( |
| 299 const AutofillProfile& p1, | 455 const AutofillProfile& p1, |
| 300 const AutofillProfile& p2) const { | 456 const AutofillProfile& p2) const { |
| 301 // If the address are not in the same country, then they're not the same. If | 457 // If the address are not in the same country, then they're not the same. If |
| 302 // one of the address countries is unknown/invalid the comparison continues. | 458 // one of the address countries is unknown/invalid the comparison continues. |
| 303 const base::string16& country1 = p1.GetInfo( | 459 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE); |
| 304 AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_); | 460 const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_); |
| 305 const base::string16& country2 = p2.GetInfo( | 461 const base::string16& country2 = p2.GetInfo(kCountryCode, app_locale_); |
| 306 AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_); | |
| 307 if (!country1.empty() && !country2.empty() && | 462 if (!country1.empty() && !country2.empty() && |
| 308 !case_insensitive_compare_.StringsEqual(country1, country2)) { | 463 !case_insensitive_compare_.StringsEqual(country1, country2)) { |
| 309 return false; | 464 return false; |
| 310 } | 465 } |
| 311 | 466 |
| 312 // TODO(rogerm): Lookup the normalization rules for the (common) country of | 467 // TODO(rogerm): Lookup the normalization rules for the (common) country of |
| 313 // the address. The rules should be applied post NormalizeForComparison to | 468 // the address. The rules should be applied post NormalizeForComparison to |
| 314 // the state, city, and address bag of words comparisons. | 469 // the state, city, and address bag of words comparisons. |
| 315 | 470 |
| 316 // Zip | 471 // Zip |
| 317 // ---- | 472 // ---- |
| 318 // If the addresses are definitely not in the same zip/area code then we're | 473 // If the addresses are definitely not in the same zip/area code then we're |
| 319 // done. Otherwise,the comparison continues. | 474 // done. Otherwise,the comparison continues. |
| 475 const AutofillType kZipCode(ADDRESS_HOME_ZIP); | |
| 320 const base::string16& zip1 = NormalizeForComparison( | 476 const base::string16& zip1 = NormalizeForComparison( |
| 321 p1.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_), | 477 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); |
| 322 DISCARD_WHITESPACE); | |
| 323 const base::string16& zip2 = NormalizeForComparison( | 478 const base::string16& zip2 = NormalizeForComparison( |
| 324 p2.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_), | 479 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); |
| 325 DISCARD_WHITESPACE); | |
| 326 if (!zip1.empty() && !zip2.empty() && | 480 if (!zip1.empty() && !zip2.empty() && |
| 327 zip1.find(zip2) == base::string16::npos && | 481 zip1.find(zip2) == base::string16::npos && |
| 328 zip2.find(zip1) == base::string16::npos) { | 482 zip2.find(zip1) == base::string16::npos) { |
| 329 return false; | 483 return false; |
| 330 } | 484 } |
| 331 | 485 |
| 332 // State | 486 // State |
| 333 // ------ | 487 // ------ |
| 334 // Heuristic: If the match is between non-empty zip codes then we can infer | 488 // Heuristic: States are mergeable if one is a (possibly empty) bag of words |
| 489 // subset of the other. | |
| 490 // | |
| 491 // TODO(rogerm): If the match is between non-empty zip codes then we can infer | |
| 335 // that the two state strings are intended to have the same meaning. This | 492 // that the two state strings are intended to have the same meaning. This |
| 336 // handles the cases where we have invalid or poorly formed data in one of the | 493 // handles the cases where we have invalid or poorly formed data in one of the |
| 337 // state values (like "Select one", or "CA - California"). Otherwise, we | 494 // state values (like "Select one", or "CA - California"). |
| 338 // actually have to check if the states map to the the same set of tokens. | 495 const AutofillType kState(ADDRESS_HOME_STATE); |
| 339 const base::string16& state1 = NormalizeForComparison( | 496 const base::string16& state1 = |
| 340 p1.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_)); | 497 NormalizeForComparison(p1.GetInfo(kState, app_locale_)); |
| 341 const base::string16& state2 = NormalizeForComparison( | 498 const base::string16& state2 = |
| 342 p2.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_)); | 499 NormalizeForComparison(p2.GetInfo(kState, app_locale_)); |
| 343 if ((zip1.empty() || zip2.empty()) && !HaveSameTokens(state1, state2)) { | 500 if (CompareTokens(state1, state2) == DIFFERENT_TOKENS) { |
| 344 return false; | 501 return false; |
| 345 } | 502 } |
| 346 | 503 |
| 347 // City | 504 // City |
| 348 // ------ | 505 // ------ |
| 349 // Heuristic: If the match is between non-empty zip codes then we can infer | 506 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words |
| 507 // subset of the other. | |
| 508 // | |
| 509 // TODO(rogerm): If the match is between non-empty zip codes then we can infer | |
| 350 // that the two city strings are intended to have the same meaning. This | 510 // that the two city strings are intended to have the same meaning. This |
| 351 // handles the cases where we have a city vs one of its suburbs. Otherwise, we | 511 // handles the cases where we have a city vs one of its suburbs. |
| 352 // actually have to check if the cities map to the the same set of tokens. | |
| 353 const base::string16& city1 = NormalizeForComparison( | 512 const base::string16& city1 = NormalizeForComparison( |
| 354 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); | 513 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); |
| 355 const base::string16& city2 = NormalizeForComparison( | 514 const base::string16& city2 = NormalizeForComparison( |
| 356 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); | 515 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); |
| 357 if ((zip1.empty() || zip2.empty()) && !HaveSameTokens(city1, city2)) { | 516 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) { |
| 358 return false; | 517 return false; |
| 359 } | 518 } |
| 360 | 519 |
| 361 // Address | 520 // Address |
| 362 // -------- | 521 // -------- |
| 363 // Heuristic: Use bag of words comparison on the post-normalized addresses. | 522 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag |
| 523 // of words subset of the other. | |
| 364 const base::string16& address1 = NormalizeForComparison( | 524 const base::string16& address1 = NormalizeForComparison( |
| 365 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); | 525 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); |
| 366 const base::string16& address2 = NormalizeForComparison( | 526 const base::string16& address2 = NormalizeForComparison( |
| 367 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); | 527 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); |
| 368 if (!HaveSameTokens(address1, address2)) { | 528 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) { |
| 369 return false; | 529 return false; |
| 370 } | 530 } |
| 371 | 531 |
| 372 return true; | 532 return true; |
| 373 } | 533 } |
| 374 | 534 |
| 375 } // namespace autofill | 535 } // namespace autofill |
| OLD | NEW |