OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "third_party/libaddressinput/chromium/input_suggester.h" |
| 6 |
| 7 #include <cstddef> |
| 8 #include <set> |
| 9 #include <utility> |
| 10 |
| 11 #include "base/logging.h" |
| 12 #include "third_party/libaddressinput/chromium/trie.h" |
| 13 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da
ta.h" |
| 14 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/callback.h
" |
| 15 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_su
pplier.h" |
| 16 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat
a.h" |
| 17 |
| 18 namespace autofill { |
| 19 |
| 20 using ::i18n::addressinput::AddressData; |
| 21 using ::i18n::addressinput::AddressField; |
| 22 using ::i18n::addressinput::BuildCallback; |
| 23 using ::i18n::addressinput::FieldProblemMap; |
| 24 using ::i18n::addressinput::PreloadSupplier; |
| 25 using ::i18n::addressinput::RegionData; |
| 26 using ::i18n::addressinput::RegionDataBuilder; |
| 27 |
| 28 using ::i18n::addressinput::ADMIN_AREA; |
| 29 using ::i18n::addressinput::COUNTRY; |
| 30 using ::i18n::addressinput::DEPENDENT_LOCALITY; |
| 31 using ::i18n::addressinput::LOCALITY; |
| 32 using ::i18n::addressinput::POSTAL_CODE; |
| 33 |
| 34 using ::i18n::addressinput::INVALID_FORMAT; |
| 35 using ::i18n::addressinput::MISMATCHING_VALUE; |
| 36 |
| 37 namespace { |
| 38 |
| 39 // Initial size for the buffer used in the canonicalizer. |
| 40 static const size_t kInitialBufferSize = 32; |
| 41 |
| 42 // A region and its metadata useful for constructing a suggestion. |
| 43 struct Suggestion { |
| 44 public: |
| 45 // Builds a suggestion of |region_to_suggest|. Does not take ownership of |
| 46 // |region_to_suggest|, which should not be NULL. |
| 47 Suggestion(const RegionData* region_to_suggest, |
| 48 AddressField matching_address_field, |
| 49 bool region_key_matches) |
| 50 : region_to_suggest(region_to_suggest), |
| 51 matching_address_field(matching_address_field), |
| 52 region_key_matches(region_key_matches) { |
| 53 DCHECK(region_to_suggest); |
| 54 } |
| 55 |
| 56 ~Suggestion() {} |
| 57 |
| 58 // The region that should be suggested. For example, if the region is ("CA", |
| 59 // "California"), then either "CA" or "California" should be suggested. |
| 60 const RegionData* region_to_suggest; |
| 61 |
| 62 // The field in the address for which the suggestion should be made. For |
| 63 // example, ADMIN_AREA in US means the suggestion should be made for the field |
| 64 // labeled "State". |
| 65 AddressField matching_address_field; |
| 66 |
| 67 // True if the key of the region matches user input (the name may or may not |
| 68 // match). "CA" should be suggested for a ("CA", "California") region. |
| 69 // |
| 70 // False if only the name of the region matches user input (the key does not |
| 71 // match). "California" should be suggested for a ("CA", "California") region. |
| 72 bool region_key_matches; |
| 73 }; |
| 74 |
| 75 // Suggestions for an address. Contains lists of suggestions for administrative |
| 76 // area, locality, and dependent locality fields of an address. |
| 77 class AddressSuggestions { |
| 78 public: |
| 79 AddressSuggestions() {} |
| 80 ~AddressSuggestions() {} |
| 81 |
| 82 // Marks all regions at |address_field| level as matching user input. |
| 83 void AllRegionsMatchForField(AddressField address_field) { |
| 84 all_regions_match_input_.insert(address_field); |
| 85 } |
| 86 |
| 87 // Marks given regions at |address_field| level as matching user input. The |
| 88 // |regions_match_key| parameter contains the regions that match user input by |
| 89 // their keys. The |regions_match_name| parameter contains the regions that |
| 90 // match user input by their names. |
| 91 // |
| 92 // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or |
| 93 // DEPENDENT_LOCALITY. |
| 94 bool AddRegions(AddressField address_field, |
| 95 const std::set<const RegionData*>& regions_match_key, |
| 96 const std::set<const RegionData*>& regions_match_name) { |
| 97 DCHECK(address_field >= ADMIN_AREA); |
| 98 DCHECK(address_field <= DEPENDENT_LOCALITY); |
| 99 |
| 100 AddressField parent_address_field = |
| 101 static_cast<AddressField>(address_field - 1); |
| 102 |
| 103 bool all_parents_match = |
| 104 parent_address_field == COUNTRY || |
| 105 all_regions_match_input_.find(parent_address_field) != |
| 106 all_regions_match_input_.end(); |
| 107 |
| 108 // Cannot build |address_field| level suggestions if there are no matches in |
| 109 // |parent_address_field| level regions. |
| 110 const RegionsMatchInput* parents = NULL; |
| 111 if (address_field > ADMIN_AREA && !all_parents_match) { |
| 112 parents = ®ions_match_input_[parent_address_field]; |
| 113 if (parents->keys.empty() && parents->names.empty()) |
| 114 return false; |
| 115 } |
| 116 |
| 117 RegionsMatchInput* regions = NULL; |
| 118 if (address_field < DEPENDENT_LOCALITY) |
| 119 regions = ®ions_match_input_[address_field]; |
| 120 |
| 121 std::vector<Suggestion>* suggestions = &suggestions_[address_field]; |
| 122 bool added_suggestions = false; |
| 123 |
| 124 // Iterate over both |regions_match_key| and |regions_match_name| and build |
| 125 // Suggestion objects based on the given RegionData objects. Advance either |
| 126 // one iterator at a time (if they point to different data) or both |
| 127 // iterators at once (if they point to the same data). |
| 128 for (std::set<const RegionData*>::const_iterator |
| 129 key_it = regions_match_key.begin(), |
| 130 name_it = regions_match_name.begin(); |
| 131 key_it != regions_match_key.end() || |
| 132 name_it != regions_match_name.end();) { |
| 133 const RegionData* key_region = |
| 134 key_it != regions_match_key.end() ? *key_it : NULL; |
| 135 const RegionData* name_region = |
| 136 name_it != regions_match_name.end() ? *name_it : NULL; |
| 137 |
| 138 // Regions that do not have a parent that also matches input will not |
| 139 // become suggestions. |
| 140 bool key_region_has_parent = |
| 141 all_parents_match || |
| 142 (parents && !parents->keys.empty() && key_region && |
| 143 parents->keys.find(&key_region->parent()) != parents->keys.end()); |
| 144 bool name_region_has_parent = |
| 145 all_parents_match || |
| 146 (parents && !parents->names.empty() && name_region && |
| 147 parents->names.find(&name_region->parent()) != parents->names.end()); |
| 148 |
| 149 if (name_region && (!key_region || name_region < key_region)) { |
| 150 if (name_region_has_parent) { |
| 151 suggestions->push_back(Suggestion(name_region, address_field, false)); |
| 152 added_suggestions = true; |
| 153 if (regions) |
| 154 regions->names.insert(name_region); |
| 155 } |
| 156 |
| 157 ++name_it; |
| 158 } else if (key_region && (!name_region || key_region < name_region)) { |
| 159 if (key_region_has_parent) { |
| 160 suggestions->push_back(Suggestion(key_region, address_field, true)); |
| 161 added_suggestions = true; |
| 162 if (regions) |
| 163 regions->keys.insert(key_region); |
| 164 } |
| 165 |
| 166 ++key_it; |
| 167 } else { |
| 168 if (key_region_has_parent) { |
| 169 suggestions->push_back(Suggestion(key_region, address_field, true)); |
| 170 added_suggestions = true; |
| 171 if (regions) { |
| 172 regions->keys.insert(key_region); |
| 173 regions->names.insert(name_region); |
| 174 } |
| 175 } |
| 176 |
| 177 ++key_it; |
| 178 ++name_it; |
| 179 } |
| 180 } |
| 181 |
| 182 return added_suggestions; |
| 183 } |
| 184 |
| 185 // Swaps the suggestions for the smallest sub-region into |suggestions|. |
| 186 // |this| is not usable after this call due to using the swap() operation. |
| 187 // |
| 188 // The |suggestions| parameter should not be NULL. |
| 189 void SwapSmallestSubRegionSuggestions(std::vector<Suggestion>* suggestions) { |
| 190 DCHECK(suggestions); |
| 191 for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) { |
| 192 std::vector<Suggestion>* result = |
| 193 &suggestions_[static_cast<AddressField>(i)]; |
| 194 if (!result->empty()) { |
| 195 suggestions->swap(*result); |
| 196 return; |
| 197 } |
| 198 } |
| 199 } |
| 200 |
| 201 private: |
| 202 // The sets of non-owned regions used for looking up regions that match user |
| 203 // input by keys and names. |
| 204 struct RegionsMatchInput { |
| 205 std::set<const RegionData*> keys; |
| 206 std::set<const RegionData*> names; |
| 207 }; |
| 208 |
| 209 // The regions that match user input at ADMIN_AREA and LOCALITY levels. |
| 210 std::map<AddressField, RegionsMatchInput> regions_match_input_; |
| 211 |
| 212 // The set of fields for which all regions match user input. Used to avoid |
| 213 // storing a long list in |regions_match_input_| and later looking it up |
| 214 // there. |
| 215 std::set<AddressField> all_regions_match_input_; |
| 216 |
| 217 // Suggestions at ADMIN_AREA, LOCALITY, and DEPENDENT_LOCALITY levels. |
| 218 std::map<AddressField, std::vector<Suggestion> > suggestions_; |
| 219 |
| 220 DISALLOW_COPY_AND_ASSIGN(AddressSuggestions); |
| 221 }; |
| 222 |
| 223 } // namespace |
| 224 |
| 225 InputSuggester::StringCanonicalizer::StringCanonicalizer() |
| 226 : buffer_(kInitialBufferSize, 0) { |
| 227 UErrorCode error_code = U_ZERO_ERROR; |
| 228 collator_.reset( |
| 229 icu::Collator::createInstance(icu::Locale::getRoot(), error_code)); |
| 230 DCHECK(U_SUCCESS(error_code)); |
| 231 collator_->setStrength(icu::Collator::PRIMARY); |
| 232 } |
| 233 |
| 234 InputSuggester::StringCanonicalizer::~StringCanonicalizer() {} |
| 235 |
| 236 const std::vector<uint8_t>& InputSuggester::StringCanonicalizer::Canonicalize( |
| 237 const std::string& original) const { |
| 238 DCHECK(!original.empty()); |
| 239 |
| 240 icu::UnicodeString icu_str(original.c_str(), |
| 241 static_cast<int32_t>(original.length())); |
| 242 int32_t sort_key_size = |
| 243 collator_->getSortKey(icu_str, &buffer_[0], buffer_size()); |
| 244 DCHECK_LT(0, sort_key_size); |
| 245 |
| 246 if (sort_key_size > buffer_size()) { |
| 247 buffer_.resize(sort_key_size * 2, 0); |
| 248 sort_key_size = collator_->getSortKey(icu_str, &buffer_[0], buffer_size()); |
| 249 DCHECK_LT(0, sort_key_size); |
| 250 DCHECK_GT(buffer_size(), sort_key_size); |
| 251 } |
| 252 |
| 253 return buffer_; |
| 254 } |
| 255 |
| 256 int32_t InputSuggester::StringCanonicalizer::buffer_size() const { |
| 257 return static_cast<int32_t>(buffer_.size()); |
| 258 } |
| 259 |
| 260 // All sub-regions of a COUNTRY level region, organized into tries for lookup by |
| 261 // region name or key. |
| 262 class InputSuggester::SubRegionData { |
| 263 public: |
| 264 SubRegionData() |
| 265 : initialized_(false), |
| 266 smallest_region_size_(COUNTRY), |
| 267 canonicalizer_(NULL) {} |
| 268 |
| 269 ~SubRegionData() {} |
| 270 |
| 271 bool is_initialized() const { return initialized_; } |
| 272 |
| 273 // Adds the sub-regions of |country_region| into tries. Uses |
| 274 // |shared_canonicalizer| for case and diacritic insensitive lookup of the |
| 275 // sub-regions. Should be called at most once. |
| 276 void Initialize(const RegionData& country_region, |
| 277 const StringCanonicalizer& shared_canonicalizer) { |
| 278 DCHECK(!initialized_); |
| 279 DCHECK(!country_region.has_parent()); |
| 280 |
| 281 initialized_ = true; |
| 282 canonicalizer_ = &shared_canonicalizer; |
| 283 |
| 284 if (!country_region.sub_regions().empty()) |
| 285 AddSubRegionsOf(country_region, COUNTRY); |
| 286 } |
| 287 |
| 288 // Adds the suggestions for |user_input| into |suggestions| when user is |
| 289 // typing in |focused_field|. |
| 290 void BuildSuggestions(const AddressData& user_input, |
| 291 AddressField focused_field, |
| 292 std::vector<Suggestion>* suggestions) { |
| 293 DCHECK(initialized_); |
| 294 |
| 295 // Do not suggest anything if there's no suggestion data for the focused |
| 296 // field. |
| 297 if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field) |
| 298 return; |
| 299 |
| 300 // Non-owned regions that match a field value by region key. |
| 301 std::set<const RegionData*> regions_match_key; |
| 302 |
| 303 // Non-owned regions that match a field value by region name. |
| 304 std::set<const RegionData*> regions_match_name; |
| 305 |
| 306 AddressSuggestions address_suggestions; |
| 307 for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY; |
| 308 ++i) { |
| 309 AddressField address_field = static_cast<AddressField>(i); |
| 310 AddressField parent_address_field = static_cast<AddressField>(i - 1); |
| 311 |
| 312 const std::string& field_value = user_input.GetFieldValue(address_field); |
| 313 const std::string& parent_field_value = |
| 314 user_input.GetFieldValue(parent_address_field); |
| 315 |
| 316 if (field_value.empty() && |
| 317 (address_field == ADMIN_AREA || parent_field_value.empty())) { |
| 318 address_suggestions.AllRegionsMatchForField(address_field); |
| 319 continue; |
| 320 } |
| 321 |
| 322 if (field_value.empty()) { |
| 323 DCHECK_NE(address_field, focused_field); |
| 324 continue; |
| 325 } |
| 326 |
| 327 regions_match_key.clear(); |
| 328 regions_match_name.clear(); |
| 329 |
| 330 const FieldTries& field_tries = field_tries_[address_field]; |
| 331 |
| 332 const std::vector<uint8_t>& canonicalized_value = |
| 333 canonicalizer_->Canonicalize(field_value); |
| 334 |
| 335 field_tries.keys.FindDataForKeyPrefix(canonicalized_value, |
| 336 ®ions_match_key); |
| 337 field_tries.names.FindDataForKeyPrefix(canonicalized_value, |
| 338 ®ions_match_name); |
| 339 |
| 340 bool added_suggestions = address_suggestions.AddRegions( |
| 341 address_field, regions_match_key, regions_match_name); |
| 342 |
| 343 // Do not suggest anything if the focused field does not have suggestions. |
| 344 if (address_field == focused_field && !added_suggestions) |
| 345 return; |
| 346 } |
| 347 |
| 348 address_suggestions.SwapSmallestSubRegionSuggestions(suggestions); |
| 349 } |
| 350 |
| 351 private: |
| 352 // The tries to lookup regions for a specific field by keys and names. For |
| 353 // example, the FieldTries for ADMIN_AREA in US will have keys for "AL", "AK", |
| 354 // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The |
| 355 // struct is uncopyable due to Trie objects being uncopyable. |
| 356 struct FieldTries { |
| 357 Trie<const RegionData*> keys; |
| 358 Trie<const RegionData*> names; |
| 359 }; |
| 360 |
| 361 // Adds the sub-regions of |parent_region| into tries. |
| 362 void AddSubRegionsOf(const RegionData& parent_region, |
| 363 AddressField parent_field) { |
| 364 DCHECK(!parent_region.sub_regions().empty()); |
| 365 |
| 366 AddressField address_field = static_cast<AddressField>(parent_field + 1); |
| 367 DCHECK(address_field >= ADMIN_AREA); |
| 368 DCHECK(address_field <= DEPENDENT_LOCALITY); |
| 369 |
| 370 FieldTries* field_tries = &field_tries_[address_field]; |
| 371 for (std::vector<const RegionData*>::const_iterator it = |
| 372 parent_region.sub_regions().begin(); |
| 373 it != parent_region.sub_regions().end(); |
| 374 ++it) { |
| 375 const RegionData* region = *it; |
| 376 DCHECK(region); |
| 377 DCHECK(!region->key().empty()); |
| 378 DCHECK(!region->name().empty()); |
| 379 |
| 380 field_tries->keys.AddDataForKey( |
| 381 canonicalizer_->Canonicalize(region->key()), region); |
| 382 |
| 383 field_tries->names.AddDataForKey( |
| 384 canonicalizer_->Canonicalize(region->name()), region); |
| 385 |
| 386 if (smallest_region_size_ < address_field) |
| 387 smallest_region_size_ = address_field; |
| 388 |
| 389 if (!region->sub_regions().empty()) |
| 390 AddSubRegionsOf(*region, address_field); |
| 391 } |
| 392 } |
| 393 |
| 394 // True after Initialize() has been called. |
| 395 bool initialized_; |
| 396 |
| 397 // The tries to lookup regions for ADMIN_AREA, LOCALITY, and |
| 398 // DEPENDENT_LOCALITY. |
| 399 std::map<AddressField, FieldTries> field_tries_; |
| 400 |
| 401 // The smallest size of a sub-region that has data. For example, this is |
| 402 // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN. |
| 403 AddressField smallest_region_size_; |
| 404 |
| 405 // A shared instance of string canonicalizer for case and diacritic comparison |
| 406 // of region keys and names. |
| 407 const StringCanonicalizer* canonicalizer_; |
| 408 }; |
| 409 |
| 410 InputSuggester::InputSuggester(PreloadSupplier* supplier) |
| 411 : region_data_builder_(supplier), |
| 412 input_helper_(supplier), |
| 413 validator_(supplier), |
| 414 validated_(BuildCallback(this, &InputSuggester::Validated)) {} |
| 415 |
| 416 InputSuggester::~InputSuggester() {} |
| 417 |
| 418 void InputSuggester::GetSuggestions(const AddressData& user_input, |
| 419 AddressField focused_field, |
| 420 size_t suggestions_limit, |
| 421 std::vector<AddressData>* suggestions) { |
| 422 DCHECK(suggestions); |
| 423 DCHECK(focused_field == POSTAL_CODE || |
| 424 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY)); |
| 425 |
| 426 AddressData address_copy = user_input; |
| 427 |
| 428 // Do not suggest anything if the user input is empty. |
| 429 if (address_copy.IsFieldEmpty(focused_field)) |
| 430 return; |
| 431 |
| 432 if (focused_field == POSTAL_CODE) { |
| 433 // Do not suggest anything if the user is typing an invalid postal code. |
| 434 FieldProblemMap problems; |
| 435 FieldProblemMap filter; |
| 436 filter.insert(std::make_pair(POSTAL_CODE, INVALID_FORMAT)); |
| 437 validator_.Validate(address_copy, |
| 438 true, // Allow postal office boxes. |
| 439 false, // Do not require recipient name. |
| 440 &filter, |
| 441 &problems, |
| 442 *validated_); |
| 443 if (!problems.empty()) |
| 444 return; |
| 445 |
| 446 // Fill in the sub-regions based on the postal code. |
| 447 input_helper_.FillAddress(&address_copy); |
| 448 } |
| 449 |
| 450 // Lazily initialize the mapping from COUNTRY level regions to all of their |
| 451 // sub-regions with metadata for generating suggestions. |
| 452 std::string unused_best_language; |
| 453 const RegionData& region_data = |
| 454 region_data_builder_.Build(address_copy.region_code, |
| 455 address_copy.language_code, |
| 456 &unused_best_language); |
| 457 SubRegionData* sub_region_data = &sub_regions_[®ion_data]; |
| 458 if (!sub_region_data->is_initialized()) |
| 459 sub_region_data->Initialize(region_data, canonicalizer_); |
| 460 |
| 461 // Build the list of regions that match |address_copy| when the user is typing |
| 462 // in the |focused_field|. |
| 463 std::vector<Suggestion> suggested_regions; |
| 464 sub_region_data->BuildSuggestions( |
| 465 address_copy, focused_field, &suggested_regions); |
| 466 |
| 467 FieldProblemMap problems; |
| 468 FieldProblemMap filter; |
| 469 filter.insert(std::make_pair(POSTAL_CODE, MISMATCHING_VALUE)); |
| 470 |
| 471 // Generate suggestions based on the regions. |
| 472 for (std::vector<Suggestion>::const_iterator suggested_region_it = |
| 473 suggested_regions.begin(); |
| 474 suggested_region_it != suggested_regions.end(); |
| 475 ++suggested_region_it) { |
| 476 AddressData address; |
| 477 address.region_code = address_copy.region_code; |
| 478 address.postal_code = address_copy.postal_code; |
| 479 |
| 480 // Traverse the tree of regions from the smallest |region_to_suggest| to the |
| 481 // country-wide "root" of the tree. Use the region names or keys found at |
| 482 // each of the levels of the tree to build the |address| to suggest. |
| 483 AddressField address_field = suggested_region_it->matching_address_field; |
| 484 for (const RegionData* region = suggested_region_it->region_to_suggest; |
| 485 region->has_parent(); |
| 486 region = ®ion->parent()) { |
| 487 address.SetFieldValue(address_field, |
| 488 suggested_region_it->region_key_matches |
| 489 ? region->key() |
| 490 : region->name()); |
| 491 address_field = static_cast<AddressField>(address_field - 1); |
| 492 } |
| 493 |
| 494 // Do not suggest an address with a mismatching postal code. |
| 495 problems.clear(); |
| 496 validator_.Validate(address, |
| 497 true, // Allow postal office boxes. |
| 498 false, // Do not require recipient name. |
| 499 &filter, |
| 500 &problems, |
| 501 *validated_); |
| 502 if (!problems.empty()) |
| 503 continue; |
| 504 |
| 505 // Do not add more suggestions than |suggestions_limit|. |
| 506 if (suggestions->size() >= suggestions_limit) { |
| 507 suggestions->clear(); |
| 508 return; |
| 509 } |
| 510 |
| 511 suggestions->push_back(address); |
| 512 } |
| 513 } |
| 514 |
| 515 void InputSuggester::Validated(bool success, |
| 516 const AddressData&, |
| 517 const FieldProblemMap&) { |
| 518 DCHECK(success); |
| 519 } |
| 520 |
| 521 } // namespace autofill |
OLD | NEW |