| OLD | NEW |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/autofill/core/browser/autofill_profile_comparator.h" | 5 #include "components/autofill/core/browser/autofill_profile_comparator.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <vector> | 8 #include <vector> |
| 9 | 9 |
| 10 #include "base/i18n/case_conversion.h" |
| 10 #include "base/i18n/char_iterator.h" | 11 #include "base/i18n/char_iterator.h" |
| 12 #include "base/strings/string_piece.h" |
| 11 #include "base/strings/string_split.h" | 13 #include "base/strings/string_split.h" |
| 12 #include "base/strings/string_util.h" | 14 #include "base/strings/string_util.h" |
| 13 #include "base/strings/utf_string_conversion_utils.h" | 15 #include "base/strings/utf_string_conversion_utils.h" |
| 14 #include "base/strings/utf_string_conversions.h" | 16 #include "base/strings/utf_string_conversions.h" |
| 17 #include "components/autofill/core/browser/autofill_country.h" |
| 15 #include "components/autofill/core/browser/autofill_data_util.h" | 18 #include "components/autofill/core/browser/autofill_data_util.h" |
| 19 #include "components/autofill/core/browser/state_names.h" |
| 16 #include "third_party/libphonenumber/phonenumber_api.h" | 20 #include "third_party/libphonenumber/phonenumber_api.h" |
| 17 | 21 |
| 22 using i18n::phonenumbers::PhoneNumberUtil; |
| 23 using base::UTF16ToUTF8; |
| 24 using base::UTF8ToUTF16; |
| 25 |
| 18 namespace autofill { | 26 namespace autofill { |
| 19 namespace { | 27 namespace { |
| 20 | 28 |
| 21 const base::char16 kSpace[] = {L' ', L'\0'}; | 29 const base::char16 kSpace[] = {L' ', L'\0'}; |
| 30 const base::char16 kUS[] = {L'U', L'S', L'\0'}; |
| 31 |
| 32 bool ContainsNewline(base::StringPiece16 text) { |
| 33 return text.find('\n') != base::StringPiece16::npos; |
| 34 } |
| 35 |
| 36 std::ostream& operator<<(std::ostream& os, |
| 37 const ::i18n::phonenumbers::PhoneNumber& n) { |
| 38 os << "country_code: " << n.country_code() << " " |
| 39 << "national_number: " << n.national_number(); |
| 40 if (n.has_extension()) |
| 41 os << " extension: \"" << n.extension() << "\""; |
| 42 if (n.has_italian_leading_zero()) |
| 43 os << " italian_leading_zero: " << n.italian_leading_zero(); |
| 44 if (n.has_number_of_leading_zeros()) |
| 45 os << " number_of_leading_zeros: " << n.number_of_leading_zeros(); |
| 46 if (n.has_raw_input()) |
| 47 os << " raw_input: \"" << n.raw_input() << "\""; |
| 48 return os; |
| 49 } |
| 22 | 50 |
| 23 } // namespace | 51 } // namespace |
| 24 | 52 |
| 25 AutofillProfileComparator::AutofillProfileComparator( | 53 AutofillProfileComparator::AutofillProfileComparator( |
| 26 const base::StringPiece& app_locale) | 54 const base::StringPiece& app_locale) |
| 27 : app_locale_(app_locale.data(), app_locale.size()) { | 55 : app_locale_(app_locale.data(), app_locale.size()) { |
| 28 // Use ICU transliteration to remove diacritics and fold case. | 56 // Use ICU transliteration to remove diacritics and fold case. |
| 29 // See http://userguide.icu-project.org/transforms/general | 57 // See http://userguide.icu-project.org/transforms/general |
| 30 UErrorCode status = U_ZERO_ERROR; | 58 UErrorCode status = U_ZERO_ERROR; |
| 31 std::unique_ptr<icu::Transliterator> transliterator( | 59 std::unique_ptr<icu::Transliterator> transliterator( |
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 106 bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1, | 134 bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1, |
| 107 const AutofillProfile& p2) const { | 135 const AutofillProfile& p2) const { |
| 108 // Sorted in order to relative expense of the tests to fail early and cheaply | 136 // Sorted in order to relative expense of the tests to fail early and cheaply |
| 109 // if possible. | 137 // if possible. |
| 110 return HaveMergeableEmailAddresses(p1, p2) && | 138 return HaveMergeableEmailAddresses(p1, p2) && |
| 111 HaveMergeableCompanyNames(p1, p2) && | 139 HaveMergeableCompanyNames(p1, p2) && |
| 112 HaveMergeablePhoneNumbers(p1, p2) && HaveMergeableNames(p1, p2) && | 140 HaveMergeablePhoneNumbers(p1, p2) && HaveMergeableNames(p1, p2) && |
| 113 HaveMergeableAddresses(p1, p2); | 141 HaveMergeableAddresses(p1, p2); |
| 114 } | 142 } |
| 115 | 143 |
| 144 bool AutofillProfileComparator::MergeNames(const AutofillProfile& p1, |
| 145 const AutofillProfile& p2, |
| 146 NameInfo* name_info) const { |
| 147 DCHECK(HaveMergeableNames(p1, p2)); |
| 148 |
| 149 const AutofillType kFullName(NAME_FULL); |
| 150 const base::string16& full_name_1 = p1.GetInfo(kFullName, app_locale_); |
| 151 const base::string16& full_name_2 = p2.GetInfo(kFullName, app_locale_); |
| 152 const base::string16& normalized_full_name_1 = |
| 153 NormalizeForComparison(full_name_1); |
| 154 const base::string16& normalized_full_name_2 = |
| 155 NormalizeForComparison(full_name_2); |
| 156 |
| 157 const base::string16* best_name = nullptr; |
| 158 if (normalized_full_name_1.empty()) { |
| 159 // p1 has no name, so use the name from p2. |
| 160 best_name = &full_name_2; |
| 161 } else if (normalized_full_name_2.empty()) { |
| 162 // p2 has no name, so use the name from p1. |
| 163 best_name = &full_name_1; |
| 164 } else if (IsNameVariantOf(normalized_full_name_1, normalized_full_name_2)) { |
| 165 // full_name_2 is a variant of full_name_1. |
| 166 best_name = &full_name_1; |
| 167 } else { |
| 168 // If the assertion that p1 and p2 have mergeable names is true, then |
| 169 // full_name_1 must be a name variant of full_name_2; |
| 170 best_name = &full_name_2; |
| 171 } |
| 172 |
| 173 name_info->SetInfo(AutofillType(NAME_FULL), *best_name, app_locale_); |
| 174 return true; |
| 175 } |
| 176 |
| 177 bool AutofillProfileComparator::MergeEmailAddresses( |
| 178 const AutofillProfile& p1, |
| 179 const AutofillProfile& p2, |
| 180 EmailInfo* email_info) const { |
| 181 DCHECK(HaveMergeableEmailAddresses(p1, p2)); |
| 182 |
| 183 const AutofillType kEmailAddress(EMAIL_ADDRESS); |
| 184 const base::string16& e1 = p1.GetInfo(kEmailAddress, app_locale_); |
| 185 const base::string16& e2 = p2.GetInfo(kEmailAddress, app_locale_); |
| 186 const base::string16* best = nullptr; |
| 187 |
| 188 if (e1.empty()) { |
| 189 best = &e2; |
| 190 } else if (e2.empty()) { |
| 191 best = &e1; |
| 192 } else { |
| 193 best = p2.use_date() > p1.use_date() ? &e2 : &e1; |
| 194 } |
| 195 |
| 196 email_info->SetInfo(kEmailAddress, *best, app_locale_); |
| 197 return true; |
| 198 } |
| 199 |
| 200 bool AutofillProfileComparator::MergeCompanyNames( |
| 201 const AutofillProfile& p1, |
| 202 const AutofillProfile& p2, |
| 203 CompanyInfo* company_info) const { |
| 204 const AutofillType kCompanyName(COMPANY_NAME); |
| 205 const base::string16& c1 = p1.GetInfo(kCompanyName, app_locale_); |
| 206 const base::string16& c2 = p2.GetInfo(kCompanyName, app_locale_); |
| 207 const base::string16* best = nullptr; |
| 208 |
| 209 DCHECK(HaveMergeableCompanyNames(p1, p2)) |
| 210 << "Company names are not mergeable: '" << c1 << "' vs '" << c2 << "'"; |
| 211 |
| 212 CompareTokensResult result = |
| 213 CompareTokens(NormalizeForComparison(c1), NormalizeForComparison(c2)); |
| 214 switch (result) { |
| 215 case DIFFERENT_TOKENS: |
| 216 default: |
| 217 NOTREACHED(); |
| 218 return false; |
| 219 case S1_CONTAINS_S2: |
| 220 best = &c1; |
| 221 break; |
| 222 case S2_CONTAINS_S1: |
| 223 best = &c2; |
| 224 break; |
| 225 case SAME_TOKENS: |
| 226 best = p2.use_date() > p1.use_date() ? &c2 : &c1; |
| 227 break; |
| 228 } |
| 229 |
| 230 company_info->SetInfo(kCompanyName, *best, app_locale_); |
| 231 return true; |
| 232 } |
| 233 |
| 234 bool AutofillProfileComparator::MergePhoneNumbers( |
| 235 const AutofillProfile& p1, |
| 236 const AutofillProfile& p2, |
| 237 PhoneNumber* phone_number) const { |
| 238 const ServerFieldType kWholePhoneNumber = PHONE_HOME_WHOLE_NUMBER; |
| 239 const base::string16& s1 = p1.GetRawInfo(kWholePhoneNumber); |
| 240 const base::string16& s2 = p2.GetRawInfo(kWholePhoneNumber); |
| 241 |
| 242 DCHECK(HaveMergeablePhoneNumbers(p1, p2)) |
| 243 << "Phone numbers are not mergeable: '" << s1 << "' vs '" << s2 << "'"; |
| 244 |
| 245 if (s1.empty()) { |
| 246 phone_number->SetRawInfo(kWholePhoneNumber, s2); |
| 247 return true; |
| 248 } |
| 249 |
| 250 if (s2.empty() || s1 == s2) { |
| 251 phone_number->SetRawInfo(kWholePhoneNumber, s1); |
| 252 return true; |
| 253 } |
| 254 |
| 255 // Figure out a country code hint. |
| 256 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE); |
| 257 std::string region = UTF16ToUTF8(GetNonEmptyOf(p1, p2, kCountryCode)); |
| 258 if (region.empty()) |
| 259 region = AutofillCountry::CountryCodeForLocale(app_locale_); |
| 260 |
| 261 // Parse the phone numbers. |
| 262 PhoneNumberUtil* phone_util = PhoneNumberUtil::GetInstance(); |
| 263 |
| 264 ::i18n::phonenumbers::PhoneNumber n1; |
| 265 if (phone_util->ParseAndKeepRawInput(UTF16ToUTF8(s1), region, &n1) != |
| 266 PhoneNumberUtil::NO_PARSING_ERROR) { |
| 267 return false; |
| 268 } |
| 269 |
| 270 ::i18n::phonenumbers::PhoneNumber n2; |
| 271 if (phone_util->ParseAndKeepRawInput(UTF16ToUTF8(s2), region, &n2) != |
| 272 PhoneNumberUtil::NO_PARSING_ERROR) { |
| 273 return false; |
| 274 } |
| 275 |
| 276 ::i18n::phonenumbers::PhoneNumber merged_number; |
| 277 DCHECK_EQ(n1.country_code(), n2.country_code()); |
| 278 merged_number.set_country_code(n1.country_code()); |
| 279 merged_number.set_national_number( |
| 280 std::max(n1.national_number(), n2.national_number())); |
| 281 if (n1.has_extension() && !n1.extension().empty()) { |
| 282 merged_number.set_extension(n1.extension()); |
| 283 } else if (n2.has_extension() && !n2.extension().empty()) { |
| 284 merged_number.set_extension(n2.extension()); |
| 285 } |
| 286 if (n1.has_italian_leading_zero() || n2.has_italian_leading_zero()) { |
| 287 merged_number.set_italian_leading_zero(n1.italian_leading_zero() || |
| 288 n2.italian_leading_zero()); |
| 289 } |
| 290 if (n1.has_number_of_leading_zeros() || n2.has_number_of_leading_zeros()) { |
| 291 merged_number.set_number_of_leading_zeros( |
| 292 std::max(n1.number_of_leading_zeros(), n2.number_of_leading_zeros())); |
| 293 } |
| 294 |
| 295 PhoneNumberUtil::PhoneNumberFormat format = |
| 296 region.empty() ? PhoneNumberUtil::NATIONAL |
| 297 : PhoneNumberUtil::INTERNATIONAL; |
| 298 |
| 299 std::string new_number; |
| 300 phone_util->Format(merged_number, format, &new_number); |
| 301 |
| 302 VLOG(1) << "n1 = {" << n1 << "}"; |
| 303 VLOG(1) << "n2 = {" << n2 << "}"; |
| 304 VLOG(1) << "merged_number = {" << merged_number << "}"; |
| 305 VLOG(1) << "new_number = \"" << new_number << "\""; |
| 306 |
| 307 // Check if it's a North American number that's missing the area code. |
| 308 // Libphonenumber doesn't know how to format short numbers; it will still |
| 309 // include the country code prefix. |
| 310 if (merged_number.country_code() == 1 && |
| 311 merged_number.national_number() <= 9999999 && |
| 312 new_number.find("+1") == 0) { |
| 313 size_t offset = 2; // The char just after "+1". |
| 314 while (offset < new_number.size() && |
| 315 base::IsAsciiWhitespace(new_number[offset])) { |
| 316 ++offset; |
| 317 } |
| 318 new_number = new_number.substr(offset); |
| 319 } |
| 320 |
| 321 phone_number->SetRawInfo(kWholePhoneNumber, UTF8ToUTF16(new_number)); |
| 322 |
| 323 return true; |
| 324 } |
| 325 |
| 326 bool AutofillProfileComparator::MergeAddresses(const AutofillProfile& p1, |
| 327 const AutofillProfile& p2, |
| 328 Address* address) const { |
| 329 DCHECK(HaveMergeableAddresses(p1, p2)); |
| 330 |
| 331 // One of the countries is empty or they are the same modulo case, so we just |
| 332 // have to find the non-empty one, if any. |
| 333 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE); |
| 334 const base::string16& country_code = |
| 335 base::i18n::ToUpper(GetNonEmptyOf(p1, p2, kCountryCode)); |
| 336 address->SetInfo(kCountryCode, country_code, app_locale_); |
| 337 |
| 338 // One of the zip codes is empty, they are the same, or one is a substring |
| 339 // of the other. We prefer the most recently used zip code. |
| 340 const AutofillType kZipCode(ADDRESS_HOME_ZIP); |
| 341 const base::string16& zip1 = p1.GetInfo(kZipCode, app_locale_); |
| 342 const base::string16& zip2 = p2.GetInfo(kZipCode, app_locale_); |
| 343 if (zip1.empty()) { |
| 344 address->SetInfo(kZipCode, zip2, app_locale_); |
| 345 } else if (zip2.empty()) { |
| 346 address->SetInfo(kZipCode, zip1, app_locale_); |
| 347 } else { |
| 348 address->SetInfo(kZipCode, (p2.use_date() > p1.use_date() ? zip2 : zip1), |
| 349 app_locale_); |
| 350 } |
| 351 |
| 352 // One of the states is empty or one of the states has a subset of tokens from |
| 353 // the other. Pick the non-empty state that is shorter. This is usually the |
| 354 // abbreviated one. |
| 355 const AutofillType kState(ADDRESS_HOME_STATE); |
| 356 const base::string16& state1 = p1.GetInfo(kState, app_locale_); |
| 357 const base::string16& state2 = p2.GetInfo(kState, app_locale_); |
| 358 if (state1.empty()) { |
| 359 address->SetInfo(kState, state2, app_locale_); |
| 360 } else if (state2.empty()) { |
| 361 address->SetInfo(kState, state1, app_locale_); |
| 362 } else { |
| 363 address->SetInfo(kState, (state2.size() < state1.size() ? state2 : state1), |
| 364 app_locale_); |
| 365 } |
| 366 |
| 367 // One of the cities is empty or one of the cities has a subset of tokens from |
| 368 // the other. Pick the city name with more tokens; this is usually the most |
| 369 // explicit one. |
| 370 const AutofillType kCity(ADDRESS_HOME_CITY); |
| 371 const base::string16& city1 = p1.GetInfo(kCity, app_locale_); |
| 372 const base::string16& city2 = p2.GetInfo(kCity, app_locale_); |
| 373 if (city1.empty()) { |
| 374 address->SetInfo(kCity, city2, app_locale_); |
| 375 } else if (city2.empty()) { |
| 376 address->SetInfo(kCity, city1, app_locale_); |
| 377 } else { |
| 378 // Prefer the one with more tokens. |
| 379 CompareTokensResult result = CompareTokens(NormalizeForComparison(city1), |
| 380 NormalizeForComparison(city2)); |
| 381 switch (result) { |
| 382 case SAME_TOKENS: |
| 383 // They have the same set of unique tokens. Let's pick the more recently |
| 384 // used one. |
| 385 address->SetInfo(kCity, (p2.use_date() > p1.use_date() ? city2 : city1), |
| 386 app_locale_); |
| 387 break; |
| 388 case S1_CONTAINS_S2: |
| 389 // city1 has more unique tokens than city2. |
| 390 address->SetInfo(kCity, city1, app_locale_); |
| 391 break; |
| 392 case S2_CONTAINS_S1: |
| 393 // city2 has more unique tokens than city1. |
| 394 address->SetInfo(kCity, city2, app_locale_); |
| 395 break; |
| 396 case DIFFERENT_TOKENS: |
| 397 default: |
| 398 // The addresses aren't mergeable and we shouldn't be doing any of |
| 399 // this. |
| 400 NOTREACHED(); |
| 401 return false; |
| 402 } |
| 403 } |
| 404 |
| 405 // One of the addresses is empty or one of the addresses has a subset of |
| 406 // tokens from the other. Prefer the more verbosely expressed one. |
| 407 const AutofillType kStreetAddress(ADDRESS_HOME_STREET_ADDRESS); |
| 408 const base::string16& address1 = p1.GetInfo(kStreetAddress, app_locale_); |
| 409 const base::string16& address2 = p2.GetInfo(kStreetAddress, app_locale_); |
| 410 // If one of the addresses is empty then use the other. |
| 411 if (address1.empty()) { |
| 412 address->SetInfo(kStreetAddress, address2, app_locale_); |
| 413 } else if (address2.empty()) { |
| 414 address->SetInfo(kStreetAddress, address1, app_locale_); |
| 415 } else { |
| 416 // Prefer the multi-line address if one is multi-line and the other isn't. |
| 417 bool address1_multiline = ContainsNewline(address1); |
| 418 bool address2_multiline = ContainsNewline(address2); |
| 419 if (address1_multiline && !address2_multiline) { |
| 420 address->SetInfo(kStreetAddress, address1, app_locale_); |
| 421 } else if (address2_multiline && !address1_multiline) { |
| 422 address->SetInfo(kStreetAddress, address2, app_locale_); |
| 423 } else { |
| 424 // Prefer the one with more tokens if they're both single-line or both |
| 425 // multi-line addresses. |
| 426 CompareTokensResult result = CompareTokens( |
| 427 NormalizeForComparison(address1), NormalizeForComparison(address2)); |
| 428 switch (result) { |
| 429 case SAME_TOKENS: |
| 430 // They have the same set of unique tokens. Let's pick the one that's |
| 431 // longer. |
| 432 address->SetInfo( |
| 433 kStreetAddress, |
| 434 (p2.use_date() > p1.use_date() ? address2 : address1), |
| 435 app_locale_); |
| 436 break; |
| 437 case S1_CONTAINS_S2: |
| 438 // address1 has more unique tokens than address2. |
| 439 address->SetInfo(kStreetAddress, address1, app_locale_); |
| 440 break; |
| 441 case S2_CONTAINS_S1: |
| 442 // address2 has more unique tokens than address1. |
| 443 address->SetInfo(kStreetAddress, address1, app_locale_); |
| 444 break; |
| 445 case DIFFERENT_TOKENS: |
| 446 default: |
| 447 // The addresses aren't mergeable and we shouldn't be doing any of |
| 448 // this. |
| 449 NOTREACHED(); |
| 450 return false; |
| 451 } |
| 452 } |
| 453 } |
| 454 return true; |
| 455 } |
| 456 |
| 116 // static | 457 // static |
| 117 std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens( | 458 std::set<base::StringPiece16> AutofillProfileComparator::UniqueTokens( |
| 118 base::StringPiece16 s) { | 459 base::StringPiece16 s) { |
| 119 std::vector<base::StringPiece16> tokens = base::SplitStringPiece( | 460 std::vector<base::StringPiece16> tokens = base::SplitStringPiece( |
| 120 s, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); | 461 s, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); |
| 121 return std::set<base::StringPiece16>(tokens.begin(), tokens.end()); | 462 return std::set<base::StringPiece16>(tokens.begin(), tokens.end()); |
| 122 } | 463 } |
| 123 | 464 |
| 124 // static | 465 // static |
| 125 bool AutofillProfileComparator::HaveSameTokens(base::StringPiece16 s1, | 466 AutofillProfileComparator::CompareTokensResult |
| 126 base::StringPiece16 s2) { | 467 AutofillProfileComparator::CompareTokens(base::StringPiece16 s1, |
| 468 base::StringPiece16 s2) { |
| 469 // Note: std::include() expects the items in each range to be in sorted order, |
| 470 // hence the use of std::set<> instead of std::unordered_set<>. |
| 127 std::set<base::StringPiece16> t1 = UniqueTokens(s1); | 471 std::set<base::StringPiece16> t1 = UniqueTokens(s1); |
| 128 std::set<base::StringPiece16> t2 = UniqueTokens(s2); | 472 std::set<base::StringPiece16> t2 = UniqueTokens(s2); |
| 129 | 473 |
| 130 // Note: std::include() expects the items in each range to be in sorted order, | 474 // Does s1 contains all of the tokens in s2? As a special case, return 0 if |
| 131 // hence the use of std::set<> instead of std::unordered_set<>. | 475 // the two sets are exactly the same. |
| 132 return std::includes(t1.begin(), t1.end(), t2.begin(), t2.end()) || | 476 if (std::includes(t1.begin(), t1.end(), t2.begin(), t2.end())) |
| 133 std::includes(t2.begin(), t2.end(), t1.begin(), t1.end()); | 477 return t1.size() == t2.size() ? SAME_TOKENS : S1_CONTAINS_S2; |
| 478 |
| 479 // Does s2 contain all of the tokens in s1? |
| 480 if (std::includes(t2.begin(), t2.end(), t1.begin(), t1.end())) |
| 481 return S2_CONTAINS_S1; |
| 482 |
| 483 // Neither string contains all of the tokens from the other. |
| 484 return DIFFERENT_TOKENS; |
| 485 } |
| 486 |
| 487 base::string16 AutofillProfileComparator::GetNonEmptyOf( |
| 488 const AutofillProfile& p1, |
| 489 const AutofillProfile& p2, |
| 490 AutofillType t) const { |
| 491 const base::string16& s1 = p1.GetInfo(t, app_locale_); |
| 492 if (!s1.empty()) |
| 493 return s1; |
| 494 return p2.GetInfo(t, app_locale_); |
| 134 } | 495 } |
| 135 | 496 |
| 136 // static | 497 // static |
| 137 std::set<base::string16> AutofillProfileComparator::GetNamePartVariants( | 498 std::set<base::string16> AutofillProfileComparator::GetNamePartVariants( |
| 138 const base::string16& name_part) { | 499 const base::string16& name_part) { |
| 139 const size_t kMaxSupportedSubNames = 8; | 500 const size_t kMaxSupportedSubNames = 8; |
| 140 | 501 |
| 141 std::vector<base::string16> sub_names = base::SplitString( | 502 std::vector<base::string16> sub_names = base::SplitString( |
| 142 name_part, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); | 503 name_part, kSpace, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); |
| 143 | 504 |
| 144 // Limit the number of sub-names we support (to constrain memory usage); | 505 // Limit the number of sub-names we support (to constrain memory usage); |
| 145 if (sub_names.size() > kMaxSupportedSubNames) | 506 if (sub_names.size() > kMaxSupportedSubNames) |
| 146 return {name_part}; | 507 return {name_part}; |
| 147 | 508 |
| 148 // Start with the empty string as a variant. | 509 // Start with the empty string as a variant. |
| 149 std::set<base::string16> variants = {base::EmptyString16()}; | 510 std::set<base::string16> variants = {base::EmptyString16()}; |
| 150 | 511 |
| 151 // For each sub-name, add a variant of all the already existing variants that | 512 // For each sub-name, add a variant of all the already existing variants that |
| 152 // appends this sub-name and one that appends the initial of this sub-name. | 513 // appends this sub-name and one that appends the initial of this sub-name. |
| 153 // Duplicates will be discarded when they're added to the variants set. | 514 // Duplicates will be discarded when they're added to the variants set. |
| 154 for (const base::string16& sub_name : sub_names) { | 515 for (const base::string16& sub_name : sub_names) { |
| 155 if (sub_name.empty()) continue; | 516 if (sub_name.empty()) |
| 517 continue; |
| 156 std::vector<base::string16> new_variants; | 518 std::vector<base::string16> new_variants; |
| 157 for (const base::string16& variant : variants) { | 519 for (const base::string16& variant : variants) { |
| 158 new_variants.push_back(base::CollapseWhitespace( | 520 new_variants.push_back(base::CollapseWhitespace( |
| 159 base::JoinString({variant, sub_name}, kSpace), true)); | 521 base::JoinString({variant, sub_name}, kSpace), true)); |
| 160 new_variants.push_back(base::CollapseWhitespace( | 522 new_variants.push_back(base::CollapseWhitespace( |
| 161 base::JoinString({variant, sub_name.substr(0, 1)}, kSpace), true)); | 523 base::JoinString({variant, sub_name.substr(0, 1)}, kSpace), true)); |
| 162 } | 524 } |
| 163 variants.insert(new_variants.begin(), new_variants.end()); | 525 variants.insert(new_variants.begin(), new_variants.end()); |
| 164 } | 526 } |
| 165 | 527 |
| 166 // As a common case, also add the variant that just concatenates all of the | 528 // As a common case, also add the variant that just concatenates all of the |
| 167 // initials. | 529 // initials. |
| 168 base::string16 initials; | 530 base::string16 initials; |
| 169 for (const base::string16& sub_name : sub_names) { | 531 for (const base::string16& sub_name : sub_names) { |
| 170 if (sub_name.empty()) continue; | 532 if (sub_name.empty()) |
| 533 continue; |
| 171 initials.push_back(sub_name[0]); | 534 initials.push_back(sub_name[0]); |
| 172 } | 535 } |
| 173 variants.insert(initials); | 536 variants.insert(initials); |
| 174 | 537 |
| 175 // And, we're done. | 538 // And, we're done. |
| 176 return variants; | 539 return variants; |
| 177 } | 540 } |
| 178 | 541 |
| 179 bool AutofillProfileComparator::IsNameVariantOf( | 542 bool AutofillProfileComparator::IsNameVariantOf( |
| 180 const base::string16& full_name_1, | 543 const base::string16& full_name_1, |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 246 } | 609 } |
| 247 | 610 |
| 248 bool AutofillProfileComparator::HaveMergeableCompanyNames( | 611 bool AutofillProfileComparator::HaveMergeableCompanyNames( |
| 249 const AutofillProfile& p1, | 612 const AutofillProfile& p1, |
| 250 const AutofillProfile& p2) const { | 613 const AutofillProfile& p2) const { |
| 251 const base::string16& company_name_1 = NormalizeForComparison( | 614 const base::string16& company_name_1 = NormalizeForComparison( |
| 252 p1.GetInfo(AutofillType(COMPANY_NAME), app_locale_)); | 615 p1.GetInfo(AutofillType(COMPANY_NAME), app_locale_)); |
| 253 const base::string16& company_name_2 = NormalizeForComparison( | 616 const base::string16& company_name_2 = NormalizeForComparison( |
| 254 p2.GetInfo(AutofillType(COMPANY_NAME), app_locale_)); | 617 p2.GetInfo(AutofillType(COMPANY_NAME), app_locale_)); |
| 255 return company_name_1.empty() || company_name_2.empty() || | 618 return company_name_1.empty() || company_name_2.empty() || |
| 256 HaveSameTokens(company_name_1, company_name_2); | 619 CompareTokens(company_name_1, company_name_2) != DIFFERENT_TOKENS; |
| 257 } | 620 } |
| 258 | 621 |
| 259 bool AutofillProfileComparator::HaveMergeablePhoneNumbers( | 622 bool AutofillProfileComparator::HaveMergeablePhoneNumbers( |
| 260 const AutofillProfile& p1, | 623 const AutofillProfile& p1, |
| 261 const AutofillProfile& p2) const { | 624 const AutofillProfile& p2) const { |
| 262 // We work with the raw phone numbers to avoid losing any helpful information | 625 // We work with the raw phone numbers to avoid losing any helpful information |
| 263 // as we parse. | 626 // as we parse. |
| 264 const base::string16& raw_phone_1 = p1.GetRawInfo(PHONE_HOME_WHOLE_NUMBER); | 627 const base::string16& raw_phone_1 = p1.GetRawInfo(PHONE_HOME_WHOLE_NUMBER); |
| 265 const base::string16& raw_phone_2 = p2.GetRawInfo(PHONE_HOME_WHOLE_NUMBER); | 628 const base::string16& raw_phone_2 = p2.GetRawInfo(PHONE_HOME_WHOLE_NUMBER); |
| 266 | 629 |
| 267 // Are the two phone numbers trivially mergeable? | 630 // Are the two phone numbers trivially mergeable? |
| 268 if (raw_phone_1.empty() || raw_phone_2.empty() || | 631 if (raw_phone_1.empty() || raw_phone_2.empty() || |
| 269 raw_phone_1 == raw_phone_2) { | 632 raw_phone_1 == raw_phone_2) { |
| 270 return true; | 633 return true; |
| 271 } | 634 } |
| 272 | 635 |
| 273 // TODO(rogerm): Modify ::autofill::i18n::PhoneNumbersMatch to support | 636 // TODO(rogerm): Modify ::autofill::i18n::PhoneNumbersMatch to support |
| 274 // SHORT_NSN_MATCH and just call that instead of accessing the underlying | 637 // SHORT_NSN_MATCH and just call that instead of accessing the underlying |
| 275 // utility library directly? | 638 // utility library directly? |
| 276 | 639 |
| 277 // The phone number util library needs the numbers in utf8. | 640 // The phone number util library needs the numbers in utf8. |
| 278 const std::string phone_1 = base::UTF16ToUTF8(raw_phone_1); | 641 const std::string phone_1 = base::UTF16ToUTF8(raw_phone_1); |
| 279 const std::string phone_2 = base::UTF16ToUTF8(raw_phone_2); | 642 const std::string phone_2 = base::UTF16ToUTF8(raw_phone_2); |
| 280 | 643 |
| 281 // Parse and compare the phone numbers. | 644 // Parse and compare the phone numbers. |
| 282 using ::i18n::phonenumbers::PhoneNumberUtil; | |
| 283 PhoneNumberUtil* phone_util = PhoneNumberUtil::GetInstance(); | 645 PhoneNumberUtil* phone_util = PhoneNumberUtil::GetInstance(); |
| 284 switch (phone_util->IsNumberMatchWithTwoStrings(phone_1, phone_2)) { | 646 switch (phone_util->IsNumberMatchWithTwoStrings(phone_1, phone_2)) { |
| 285 case PhoneNumberUtil::INVALID_NUMBER: | 647 case PhoneNumberUtil::INVALID_NUMBER: |
| 286 case PhoneNumberUtil::NO_MATCH: | 648 case PhoneNumberUtil::NO_MATCH: |
| 287 return false; | 649 return false; |
| 288 case PhoneNumberUtil::SHORT_NSN_MATCH: | 650 case PhoneNumberUtil::SHORT_NSN_MATCH: |
| 289 case PhoneNumberUtil::NSN_MATCH: | 651 case PhoneNumberUtil::NSN_MATCH: |
| 290 case PhoneNumberUtil::EXACT_MATCH: | 652 case PhoneNumberUtil::EXACT_MATCH: |
| 291 return true; | 653 return true; |
| 292 } | 654 } |
| 293 | 655 |
| 294 NOTREACHED(); | 656 NOTREACHED(); |
| 295 return false; | 657 return false; |
| 296 } | 658 } |
| 297 | 659 |
| 298 bool AutofillProfileComparator::HaveMergeableAddresses( | 660 bool AutofillProfileComparator::HaveMergeableAddresses( |
| 299 const AutofillProfile& p1, | 661 const AutofillProfile& p1, |
| 300 const AutofillProfile& p2) const { | 662 const AutofillProfile& p2) const { |
| 301 // If the address are not in the same country, then they're not the same. If | 663 // If the address are not in the same country, then they're not the same. If |
| 302 // one of the address countries is unknown/invalid the comparison continues. | 664 // one of the address countries is unknown/invalid the comparison continues. |
| 303 const base::string16& country1 = p1.GetInfo( | 665 const AutofillType kCountryCode(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE); |
| 304 AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_); | 666 const base::string16& country1 = p1.GetInfo(kCountryCode, app_locale_); |
| 305 const base::string16& country2 = p2.GetInfo( | 667 const base::string16& country2 = p2.GetInfo(kCountryCode, app_locale_); |
| 306 AutofillType(HTML_TYPE_COUNTRY_CODE, HTML_MODE_NONE), app_locale_); | |
| 307 if (!country1.empty() && !country2.empty() && | 668 if (!country1.empty() && !country2.empty() && |
| 308 !case_insensitive_compare_.StringsEqual(country1, country2)) { | 669 !case_insensitive_compare_.StringsEqual(country1, country2)) { |
| 309 return false; | 670 return false; |
| 310 } | 671 } |
| 311 | 672 |
| 312 // TODO(rogerm): Lookup the normalization rules for the (common) country of | 673 // TODO(rogerm): Lookup the normalization rules for the (common) country of |
| 313 // the address. The rules should be applied post NormalizeForComparison to | 674 // the address. The rules should be applied post NormalizeForComparison to |
| 314 // the state, city, and address bag of words comparisons. | 675 // the state, city, and address bag of words comparisons. |
| 315 | 676 |
| 316 // Zip | 677 // Zip |
| 317 // ---- | 678 // ---- |
| 318 // If the addresses are definitely not in the same zip/area code then we're | 679 // If the addresses are definitely not in the same zip/area code then we're |
| 319 // done. Otherwise,the comparison continues. | 680 // done. Otherwise,the comparison continues. |
| 681 const AutofillType kZipCode(ADDRESS_HOME_ZIP); |
| 320 const base::string16& zip1 = NormalizeForComparison( | 682 const base::string16& zip1 = NormalizeForComparison( |
| 321 p1.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_), | 683 p1.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); |
| 322 DISCARD_WHITESPACE); | |
| 323 const base::string16& zip2 = NormalizeForComparison( | 684 const base::string16& zip2 = NormalizeForComparison( |
| 324 p2.GetInfo(AutofillType(ADDRESS_HOME_ZIP), app_locale_), | 685 p2.GetInfo(kZipCode, app_locale_), DISCARD_WHITESPACE); |
| 325 DISCARD_WHITESPACE); | |
| 326 if (!zip1.empty() && !zip2.empty() && | 686 if (!zip1.empty() && !zip2.empty() && |
| 327 zip1.find(zip2) == base::string16::npos && | 687 zip1.find(zip2) == base::string16::npos && |
| 328 zip2.find(zip1) == base::string16::npos) { | 688 zip2.find(zip1) == base::string16::npos) { |
| 329 return false; | 689 return false; |
| 330 } | 690 } |
| 331 | 691 |
| 332 // State | 692 // State |
| 333 // ------ | 693 // ------ |
| 334 // Heuristic: If the match is between non-empty zip codes then we can infer | 694 // Heuristic: States are mergeable if one is a (possibly empty) bag of words |
| 695 // subset of the other. |
| 696 // |
| 697 // TODO(rogerm): If the match is between non-empty zip codes then we can infer |
| 335 // that the two state strings are intended to have the same meaning. This | 698 // that the two state strings are intended to have the same meaning. This |
| 336 // handles the cases where we have invalid or poorly formed data in one of the | 699 // handles the cases where we have invalid or poorly formed data in one of the |
| 337 // state values (like "Select one", or "CA - California"). Otherwise, we | 700 // state values (like "Select one", or "CA - California"). |
| 338 // actually have to check if the states map to the the same set of tokens. | 701 const AutofillType kState(ADDRESS_HOME_STATE); |
| 339 const base::string16& state1 = NormalizeForComparison( | 702 const base::string16& state1 = |
| 340 p1.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_)); | 703 NormalizeForComparison(p1.GetInfo(kState, app_locale_)); |
| 341 const base::string16& state2 = NormalizeForComparison( | 704 const base::string16& state2 = |
| 342 p2.GetInfo(AutofillType(ADDRESS_HOME_STATE), app_locale_)); | 705 NormalizeForComparison(p2.GetInfo(kState, app_locale_)); |
| 343 if ((zip1.empty() || zip2.empty()) && !HaveSameTokens(state1, state2)) { | 706 if (!IsMatchingState(GetNonEmptyOf(p1, p2, kCountryCode), state1, state2) && |
| 707 CompareTokens(state1, state2) == DIFFERENT_TOKENS) { |
| 344 return false; | 708 return false; |
| 345 } | 709 } |
| 346 | 710 |
| 347 // City | 711 // City |
| 348 // ------ | 712 // ------ |
| 349 // Heuristic: If the match is between non-empty zip codes then we can infer | 713 // Heuristic: Cities are mergeable if one is a (possibly empty) bag of words |
| 714 // subset of the other. |
| 715 // |
| 716 // TODO(rogerm): If the match is between non-empty zip codes then we can infer |
| 350 // that the two city strings are intended to have the same meaning. This | 717 // that the two city strings are intended to have the same meaning. This |
| 351 // handles the cases where we have a city vs one of its suburbs. Otherwise, we | 718 // handles the cases where we have a city vs one of its suburbs. |
| 352 // actually have to check if the cities map to the the same set of tokens. | |
| 353 const base::string16& city1 = NormalizeForComparison( | 719 const base::string16& city1 = NormalizeForComparison( |
| 354 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); | 720 p1.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); |
| 355 const base::string16& city2 = NormalizeForComparison( | 721 const base::string16& city2 = NormalizeForComparison( |
| 356 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); | 722 p2.GetInfo(AutofillType(ADDRESS_HOME_CITY), app_locale_)); |
| 357 if ((zip1.empty() || zip2.empty()) && !HaveSameTokens(city1, city2)) { | 723 if (CompareTokens(city1, city2) == DIFFERENT_TOKENS) { |
| 358 return false; | 724 return false; |
| 359 } | 725 } |
| 360 | 726 |
| 361 // Address | 727 // Address |
| 362 // -------- | 728 // -------- |
| 363 // Heuristic: Use bag of words comparison on the post-normalized addresses. | 729 // Heuristic: Street addresses are mergeable if one is a (possibly empty) bag |
| 730 // of words subset of the other. |
| 364 const base::string16& address1 = NormalizeForComparison( | 731 const base::string16& address1 = NormalizeForComparison( |
| 365 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); | 732 p1.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); |
| 366 const base::string16& address2 = NormalizeForComparison( | 733 const base::string16& address2 = NormalizeForComparison( |
| 367 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); | 734 p2.GetInfo(AutofillType(ADDRESS_HOME_STREET_ADDRESS), app_locale_)); |
| 368 if (!HaveSameTokens(address1, address2)) { | 735 if (CompareTokens(address1, address2) == DIFFERENT_TOKENS) { |
| 369 return false; | 736 return false; |
| 370 } | 737 } |
| 371 | 738 |
| 372 return true; | 739 return true; |
| 373 } | 740 } |
| 374 | 741 |
| 742 bool AutofillProfileComparator::IsMatchingState( |
| 743 const base::string16& country_code, |
| 744 const base::string16& state1, |
| 745 const base::string16& state2) const { |
| 746 if (state1 == state2) |
| 747 return true; |
| 748 |
| 749 if (country_code != kUS) |
| 750 return false; |
| 751 |
| 752 // TODO(rogerm): Generalize this to all locals using string equivalence rules. |
| 753 base::string16 name, abbreviation; |
| 754 autofill::state_names::GetNameAndAbbreviation(state1, &name, &abbreviation); |
| 755 if (abbreviation.empty()) { |
| 756 // state1 wasn't recognized. There's no need to compare it to state2 |
| 757 return false; |
| 758 } |
| 759 |
| 760 return state2 == name || state2 == abbreviation; |
| 761 } |
| 762 |
| 375 } // namespace autofill | 763 } // namespace autofill |
| OLD | NEW |