OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "third_party/libaddressinput/chromium/input_suggester.h" |
| 6 |
| 7 #include <map> |
| 8 #include <set> |
| 9 #include <utility> |
| 10 |
| 11 #include "base/basictypes.h" |
| 12 #include "base/logging.h" |
| 13 #include "base/memory/scoped_ptr.h" |
| 14 #include "base/memory/scoped_vector.h" |
| 15 #include "base/stl_util.h" |
| 16 #include "third_party/icu/source/i18n/unicode/coll.h" |
| 17 #include "third_party/libaddressinput/chromium/trie.h" |
| 18 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da
ta.h" |
| 19 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat
a.h" |
| 20 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat
a_builder.h" |
| 21 |
| 22 namespace autofill { |
| 23 |
| 24 using ::i18n::addressinput::AddressData; |
| 25 using ::i18n::addressinput::AddressField; |
| 26 using ::i18n::addressinput::PreloadSupplier; |
| 27 using ::i18n::addressinput::RegionData; |
| 28 using ::i18n::addressinput::RegionDataBuilder; |
| 29 |
| 30 using ::i18n::addressinput::ADMIN_AREA; |
| 31 using ::i18n::addressinput::COUNTRY; |
| 32 using ::i18n::addressinput::DEPENDENT_LOCALITY; |
| 33 using ::i18n::addressinput::LOCALITY; |
| 34 using ::i18n::addressinput::POSTAL_CODE; |
| 35 |
| 36 namespace { |
| 37 |
| 38 // A region and its metadata useful for constructing a suggestion. The object is |
| 39 // immutable and uncopyable. |
| 40 struct Suggestion { |
| 41 public: |
| 42 // Builds a suggestion of |region_to_suggest|. Does not take ownership of |
| 43 // |region_to_suggest|, which should not be NULL. At least one of |
| 44 // |region_key_matches| and |region_name_names| should be true, otherwise it's |
| 45 // not a valid suggestion. |
| 46 Suggestion(const RegionData* region_to_suggest, |
| 47 AddressField matching_address_field, |
| 48 bool region_key_matches, |
| 49 bool region_name_matches) |
| 50 : region_to_suggest(region_to_suggest), |
| 51 matching_address_field(matching_address_field), |
| 52 region_key_matches(region_key_matches), |
| 53 region_name_matches(region_name_matches) { |
| 54 DCHECK(region_to_suggest); |
| 55 DCHECK(region_key_matches || region_name_matches); |
| 56 } |
| 57 |
| 58 ~Suggestion() {} |
| 59 |
| 60 // The region that should be suggested. For example, if |
| 61 // |region_to_suggest->name()| is "California", then "California" or "CA" |
| 62 // can be suggested. |
| 63 const RegionData* const region_to_suggest; |
| 64 |
| 65 // The field in the address for which the suggestion should be made. For |
| 66 // example, ADMIN_AREA in US means the suggestion should be made for the field |
| 67 // labeled "State". |
| 68 const AddressField matching_address_field; |
| 69 |
| 70 // True if the the key of the region matches user input and can be used in |
| 71 // suggestion. For example, if this is true and |region_to_suggest->key()| is |
| 72 // "CA", then "CA" cab be suggested. |
| 73 const bool region_key_matches; |
| 74 |
| 75 // True if the name of the region matches user input and can be used in |
| 76 // suggestion. For example, if this is true and |region_to_suggest->name()| is |
| 77 // "California", then "California" can be suggested. |
| 78 const bool region_name_matches; |
| 79 }; |
| 80 |
| 81 // Suggestions for an address. Contains lists of suggestions for every field in |
| 82 // an address. |
| 83 class AddressSuggestions { |
| 84 public: |
| 85 AddressSuggestions() { |
| 86 for (int i = ADMIN_AREA; i <= LOCALITY; ++i) |
| 87 parent_regions_[static_cast<AddressField>(i)] = new ParentRegions; |
| 88 for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i) |
| 89 suggestions_[static_cast<AddressField>(i)] = new ScopedVector<Suggestion>; |
| 90 } |
| 91 |
| 92 ~AddressSuggestions() { |
| 93 STLDeleteValues(&parent_regions_); |
| 94 STLDeleteValues(&suggestions_); |
| 95 } |
| 96 |
| 97 // Marks all regions at |address_field| level as matching user input. |
| 98 void AllRegionsMatchForField(AddressField address_field) { |
| 99 all_parent_regions_match_.insert(address_field); |
| 100 } |
| 101 |
| 102 // Marks given regions at |address_field| level as matching user input. The |
| 103 // |regions_match_key| parameter contains the regions that match user input by |
| 104 // their keys. The |regions_match_name| parameter---by their names. |
| 105 // |
| 106 // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or |
| 107 // DEPENDENT_LOCALITY. |
| 108 bool AddRegions(AddressField address_field, |
| 109 const std::set<const RegionData*>& regions_match_key, |
| 110 const std::set<const RegionData*>& regions_match_name) { |
| 111 DCHECK(address_field >= ADMIN_AREA); |
| 112 DCHECK(address_field <= DEPENDENT_LOCALITY); |
| 113 |
| 114 AddressField parent_address_field = |
| 115 static_cast<AddressField>(address_field - 1); |
| 116 |
| 117 bool all_parents_match = |
| 118 parent_address_field == COUNTRY || |
| 119 all_parent_regions_match_.find(parent_address_field) != |
| 120 all_parent_regions_match_.end(); |
| 121 |
| 122 // Cannot build |address_field| level suggestions if none of there are no |
| 123 // matches in |parent_address_field| level regions. |
| 124 const ParentRegions* parents = NULL; |
| 125 if (address_field > ADMIN_AREA && !all_parents_match) { |
| 126 parents = parent_regions_[parent_address_field]; |
| 127 if (parents->keys.empty() && parents->names.empty()) |
| 128 return false; |
| 129 } |
| 130 |
| 131 ParentRegions* regions = NULL; |
| 132 if (address_field < DEPENDENT_LOCALITY) |
| 133 regions = parent_regions_[address_field]; |
| 134 |
| 135 ScopedVector<Suggestion>* suggestions = suggestions_[address_field]; |
| 136 bool added_suggestions = false; |
| 137 |
| 138 // Iterate over both |regions_match_key| and |regions_match_name|. Advance |
| 139 // either one iterator at a time (if they point to different data) or both |
| 140 // iterators at once (if they point to the same data). |
| 141 for (std::set<const RegionData*>::const_iterator |
| 142 key_it = regions_match_key.begin(), |
| 143 name_it = regions_match_name.begin(); |
| 144 key_it != regions_match_key.end() || |
| 145 name_it != regions_match_name.end();) { |
| 146 const RegionData* key_region = |
| 147 key_it != regions_match_key.end() ? *key_it : NULL; |
| 148 const RegionData* name_region = |
| 149 name_it != regions_match_name.end() ? *name_it : NULL; |
| 150 |
| 151 // Regions that do not have a parent that also matches input will not |
| 152 // become a suggestion. |
| 153 bool key_region_has_parent = |
| 154 all_parents_match || |
| 155 (parents && !parents->keys.empty() && key_region && |
| 156 parents->keys.find(&key_region->parent()) != parents->keys.end()); |
| 157 bool name_region_has_parent = |
| 158 all_parents_match || |
| 159 (parents && !parents->names.empty() && name_region && |
| 160 parents->names.find(&name_region->parent()) != parents->names.end()); |
| 161 |
| 162 if (name_region && (!key_region || name_region < key_region)) { |
| 163 if (name_region_has_parent) { |
| 164 suggestions->push_back( |
| 165 new Suggestion(name_region, address_field, false, true)); |
| 166 added_suggestions = true; |
| 167 if (regions) |
| 168 regions->names.insert(name_region); |
| 169 } |
| 170 |
| 171 ++name_it; |
| 172 } else if (key_region && (!name_region || key_region < name_region)) { |
| 173 if (key_region_has_parent) { |
| 174 suggestions->push_back( |
| 175 new Suggestion(key_region, address_field, true, false)); |
| 176 added_suggestions = true; |
| 177 if (regions) |
| 178 regions->keys.insert(key_region); |
| 179 } |
| 180 |
| 181 ++key_it; |
| 182 } else { |
| 183 if (key_region_has_parent) { |
| 184 suggestions->push_back( |
| 185 new Suggestion(key_region, address_field, true, true)); |
| 186 added_suggestions = true; |
| 187 if (regions) { |
| 188 regions->keys.insert(key_region); |
| 189 regions->names.insert(name_region); |
| 190 } |
| 191 } |
| 192 |
| 193 ++key_it; |
| 194 ++name_it; |
| 195 } |
| 196 } |
| 197 |
| 198 return added_suggestions; |
| 199 } |
| 200 |
| 201 // Swaps the suggestions for the smallest sub-region into |suggestions|. This |
| 202 // object is not usable after this call due to using the swap() operation. |
| 203 // |
| 204 // The |suggestions| parameter should not be NULL. |
| 205 void SwapSmallestSubRegionSuggestions(ScopedVector<Suggestion>* suggestions) { |
| 206 DCHECK(suggestions); |
| 207 for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) { |
| 208 ScopedVector<Suggestion>* result = |
| 209 suggestions_[static_cast<AddressField>(i)]; |
| 210 if (!result->empty()) { |
| 211 result->swap(*suggestions); |
| 212 return; |
| 213 } |
| 214 } |
| 215 } |
| 216 |
| 217 private: |
| 218 // The sets of non-owned regions used for fast parent region lookup. |
| 219 struct ParentRegions { |
| 220 // Regions that match user input by key. |
| 221 std::set<const RegionData*> keys; |
| 222 |
| 223 // Regions that match user input by name. |
| 224 std::set<const RegionData*> names; |
| 225 }; |
| 226 |
| 227 // The owned sets of non-owned regions for past parent region lookup at |
| 228 // ADMIN_AREA and LOCALITY levels. |
| 229 std::map<AddressField, ParentRegions*> parent_regions_; |
| 230 |
| 231 // The set of fields for which all sub-regions match user input. Used to avoid |
| 232 // storing a long list in |parent_regions_| and later looking it up there. |
| 233 std::set<AddressField> all_parent_regions_match_; |
| 234 |
| 235 // The owned vectors of suggestions at ADMIN_AREA, LOCALITY, and |
| 236 // DEPENDENT_LOCALITY levels. |
| 237 std::map<AddressField, ScopedVector<Suggestion>*> suggestions_; |
| 238 |
| 239 DISALLOW_COPY_AND_ASSIGN(AddressSuggestions); |
| 240 }; |
| 241 |
| 242 } // namespace |
| 243 |
| 244 // Canonicalizes strings for fast case and diacritic insensitive comparison. |
| 245 class StringCanonicalizer { |
| 246 public: |
| 247 // Initializes the canonicalizer. This is slow, so avoid calling it more often |
| 248 // than necessary. |
| 249 StringCanonicalizer() { |
| 250 UErrorCode error_code = U_ZERO_ERROR; |
| 251 collator_.reset( |
| 252 icu::Collator::createInstance(icu::Locale::getRoot(), error_code)); |
| 253 DCHECK(U_SUCCESS(error_code)); |
| 254 collator_->setStrength(icu::Collator::PRIMARY); |
| 255 } |
| 256 |
| 257 ~StringCanonicalizer() {} |
| 258 |
| 259 // Returns a canonical version of the string that can be used for comparing |
| 260 // strings regardless of diacritics and capitalization. |
| 261 // Canonicalize("Texas") == Canonicalize("T\u00E9xas"); |
| 262 // Canonicalize("Texas") == Canonicalize("teXas"); |
| 263 // Canonicalize("Texas") != Canonicalize("California"); |
| 264 // |
| 265 // The output is not human-readable. |
| 266 // Canonicalize("Texas") != "Texas"; |
| 267 std::string Canonicalize(const std::string& original) const { |
| 268 icu::UnicodeString icu_str(original.c_str(), |
| 269 static_cast<int32_t>(original.length())); |
| 270 int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0); |
| 271 scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]); |
| 272 DCHECK(buffer.get()); |
| 273 int32_t filled_size = |
| 274 collator_->getSortKey(icu_str, buffer.get(), buffer_size); |
| 275 DCHECK_EQ(buffer_size, filled_size); |
| 276 return std::string(reinterpret_cast<const char*>(buffer.get())); |
| 277 } |
| 278 |
| 279 private: |
| 280 scoped_ptr<icu::Collator> collator_; |
| 281 |
| 282 DISALLOW_COPY_AND_ASSIGN(StringCanonicalizer); |
| 283 }; |
| 284 |
| 285 // All sub-regions of a COUNTRY level region with metadata useful for |
| 286 // constructing suggestions. |
| 287 class SubRegionData { |
| 288 public: |
| 289 SubRegionData(const RegionData& country_region, |
| 290 const StringCanonicalizer& shared_canonicalizer) |
| 291 : canonicalizer_(shared_canonicalizer), smallest_region_size_(COUNTRY) { |
| 292 DCHECK(!country_region.has_parent()); |
| 293 |
| 294 for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i) |
| 295 field_data_[static_cast<AddressField>(i)] = new FieldData; |
| 296 |
| 297 if (!country_region.sub_regions().empty()) |
| 298 AddSubRegionsOf(country_region, COUNTRY); |
| 299 } |
| 300 |
| 301 ~SubRegionData() { STLDeleteValues(&field_data_); } |
| 302 |
| 303 void BuildSuggestions(const AddressData& user_input, |
| 304 AddressField focused_field, |
| 305 ScopedVector<Suggestion>* results) const { |
| 306 // Do not suggest anything if there's no suggestion data for the focused |
| 307 // field. |
| 308 if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field) |
| 309 return; |
| 310 |
| 311 // Canonicalized user input data for lookup in the tries. |
| 312 AddressData canonical_input = user_input; |
| 313 for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i) { |
| 314 AddressField address_field = static_cast<AddressField>(i); |
| 315 const std::string& field_value = user_input.GetFieldValue(address_field); |
| 316 if (!field_value.empty()) { |
| 317 canonical_input.SetFieldValue(address_field, |
| 318 canonicalizer_.Canonicalize(field_value)); |
| 319 } |
| 320 } |
| 321 |
| 322 // Non-owned regions that match a field value in user input by region key or |
| 323 // name. |
| 324 std::set<const RegionData*> regions_match_key; |
| 325 std::set<const RegionData*> regions_match_name; |
| 326 |
| 327 AddressSuggestions suggestions; |
| 328 for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY; |
| 329 ++i) { |
| 330 AddressField address_field = static_cast<AddressField>(i); |
| 331 AddressField parent_address_field = static_cast<AddressField>(i - 1); |
| 332 |
| 333 const std::string& canonical_field_value = |
| 334 canonical_input.GetFieldValue(address_field); |
| 335 |
| 336 if (canonical_field_value.empty() && |
| 337 (address_field == ADMIN_AREA || |
| 338 canonical_input.GetFieldValue(parent_address_field).empty())) { |
| 339 suggestions.AllRegionsMatchForField(address_field); |
| 340 continue; |
| 341 } |
| 342 |
| 343 regions_match_key.clear(); |
| 344 regions_match_name.clear(); |
| 345 |
| 346 const FieldData* field_data = field_data_.find(address_field)->second; |
| 347 field_data->keys.FindDataForKeyPrefix(canonical_field_value, |
| 348 ®ions_match_key); |
| 349 field_data->names.FindDataForKeyPrefix(canonical_field_value, |
| 350 ®ions_match_name); |
| 351 |
| 352 bool added_suggestions = suggestions.AddRegions( |
| 353 address_field, regions_match_key, regions_match_name); |
| 354 |
| 355 // Do not suggest anything if the focused field does not have suggestions. |
| 356 if (address_field == focused_field && !added_suggestions) |
| 357 return; |
| 358 } |
| 359 |
| 360 suggestions.SwapSmallestSubRegionSuggestions(results); |
| 361 } |
| 362 |
| 363 private: |
| 364 // The tries to lookup regions for a specific field by keys and names. For |
| 365 // example, the FieldData for ADMIN_AREA in US will have keys for "AL", "AK", |
| 366 // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The |
| 367 // struct is uncopyable due to Trie objects being uncopyable. |
| 368 struct FieldData { |
| 369 Trie<const RegionData*> keys; |
| 370 Trie<const RegionData*> names; |
| 371 }; |
| 372 |
| 373 void AddSubRegionsOf(const RegionData& parent_region, |
| 374 AddressField parent_field) { |
| 375 DCHECK(!parent_region.sub_regions().empty()); |
| 376 |
| 377 AddressField field = static_cast<AddressField>(parent_field + 1); |
| 378 DCHECK(field >= ADMIN_AREA); |
| 379 DCHECK(field <= DEPENDENT_LOCALITY); |
| 380 |
| 381 FieldData* field_data = field_data_[field]; |
| 382 DCHECK(field_data); |
| 383 |
| 384 for (std::vector<const RegionData*>::const_iterator it = |
| 385 parent_region.sub_regions().begin(); |
| 386 it != parent_region.sub_regions().end(); |
| 387 ++it) { |
| 388 const RegionData* region = *it; |
| 389 DCHECK(region); |
| 390 |
| 391 field_data->keys.AddDataForKey(canonicalizer_.Canonicalize(region->key()), |
| 392 region); |
| 393 field_data->names.AddDataForKey( |
| 394 canonicalizer_.Canonicalize(region->name()), region); |
| 395 |
| 396 if (smallest_region_size_ < field) |
| 397 smallest_region_size_ = field; |
| 398 |
| 399 if (!region->sub_regions().empty()) |
| 400 AddSubRegionsOf(*region, field); |
| 401 } |
| 402 } |
| 403 |
| 404 // Owned tries to lookup regions for ADMIN_AREA, LOCALITY, and |
| 405 // DEPENDENT_LOCALITY. |
| 406 std::map<AddressField, FieldData*> field_data_; |
| 407 |
| 408 // The smallest size of a sub-region that has data. For example, this is |
| 409 // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN. |
| 410 AddressField smallest_region_size_; |
| 411 |
| 412 // A shared instance of string canonicalizer for case and diacritic comparison |
| 413 // of region keys and names. |
| 414 const StringCanonicalizer& canonicalizer_; |
| 415 |
| 416 DISALLOW_COPY_AND_ASSIGN(SubRegionData); |
| 417 }; |
| 418 |
| 419 InputSuggester::InputSuggester(PreloadSupplier* supplier) |
| 420 : region_data_builder_(supplier) { |
| 421 } |
| 422 |
| 423 InputSuggester::~InputSuggester() { |
| 424 STLDeleteValues(&sub_regions_); |
| 425 } |
| 426 |
| 427 void InputSuggester::GetSuggestions(const AddressData& user_input, |
| 428 AddressField focused_field, |
| 429 size_t suggestions_limit, |
| 430 std::vector<AddressData>* suggestions) { |
| 431 DCHECK(suggestions); |
| 432 DCHECK(focused_field == POSTAL_CODE || |
| 433 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY)); |
| 434 |
| 435 // Do not suggest anything if the user input is empty. |
| 436 if (user_input.IsFieldEmpty(focused_field)) |
| 437 return; |
| 438 |
| 439 // Lazily initialize the mapping from fields to Trie objects. |
| 440 std::string unused_best_language; |
| 441 const RegionData& region_data = region_data_builder_.Build( |
| 442 user_input.region_code, user_input.language_code, &unused_best_language); |
| 443 |
| 444 std::map<const RegionData*, const SubRegionData*>::iterator |
| 445 sub_region_data_it = sub_regions_.find(®ion_data); |
| 446 if (sub_region_data_it == sub_regions_.end()) { |
| 447 if (!canonicalizer_) { |
| 448 canonicalizer_.reset(new StringCanonicalizer); |
| 449 } |
| 450 sub_region_data_it = |
| 451 sub_regions_.insert(std::make_pair(®ion_data, |
| 452 new SubRegionData(region_data, |
| 453 *canonicalizer_))) |
| 454 .first; |
| 455 } |
| 456 DCHECK(sub_region_data_it->second); |
| 457 |
| 458 // Build the list of regions that match |user_input| when the user is typing |
| 459 // in the |focused_field|. |
| 460 ScopedVector<Suggestion> suggested_regions; |
| 461 sub_region_data_it->second->BuildSuggestions( |
| 462 user_input, focused_field, &suggested_regions); |
| 463 |
| 464 // Generate suggestions based on the regions. |
| 465 for (ScopedVector<Suggestion>::const_iterator suggestion_it = |
| 466 suggested_regions.begin(); |
| 467 suggestion_it != suggested_regions.end(); |
| 468 ++suggestion_it) { |
| 469 Suggestion* suggested_region = *suggestion_it; |
| 470 |
| 471 // Do not add more suggestions than |suggestions_limit|. |
| 472 if (suggestions->size() >= suggestions_limit) { |
| 473 suggestions->clear(); |
| 474 return; |
| 475 } |
| 476 |
| 477 AddressData address; |
| 478 address.region_code = user_input.region_code; |
| 479 address.postal_code = user_input.postal_code; |
| 480 |
| 481 // Traverse the tree of regions from the most specific |region| to the |
| 482 // country-wide "root" of the tree. Use the region names found at each of |
| 483 // the levels of the ruleset tree to build the |suggestion|. |
| 484 AddressField address_field = suggested_region->matching_address_field; |
| 485 for (const RegionData* region = suggested_region->region_to_suggest; |
| 486 region->has_parent(); |
| 487 region = ®ion->parent()) { |
| 488 address.SetFieldValue(address_field, |
| 489 suggested_region->region_key_matches |
| 490 ? region->key() |
| 491 : region->name()); |
| 492 address_field = static_cast<AddressField>(address_field - 1); |
| 493 } |
| 494 |
| 495 suggestions->push_back(address); |
| 496 } |
| 497 } |
| 498 |
| 499 } // namespace autofill |
OLD | NEW |