OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "third_party/libaddressinput/chromium/suggestions.h" |
| 6 |
| 7 #include <map> |
| 8 #include <set> |
| 9 #include <utility> |
| 10 |
| 11 #include "base/basictypes.h" |
| 12 #include "base/logging.h" |
| 13 #include "base/memory/scoped_ptr.h" |
| 14 #include "third_party/icu/source/common/unicode/errorcode.h" |
| 15 #include "third_party/icu/source/common/unicode/locid.h" |
| 16 #include "third_party/icu/source/common/unicode/unistr.h" |
| 17 #include "third_party/icu/source/common/unicode/utypes.h" |
| 18 #include "third_party/icu/source/i18n/unicode/coll.h" |
| 19 #include "third_party/libaddressinput/chromium/trie.h" |
| 20 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da
ta.h" |
| 21 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_su
pplier.h" |
| 22 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat
a.h" |
| 23 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat
a_builder.h" |
| 24 |
| 25 namespace autofill { |
| 26 |
| 27 using ::i18n::addressinput::AddressData; |
| 28 using ::i18n::addressinput::AddressField; |
| 29 using ::i18n::addressinput::ADMIN_AREA; |
| 30 using ::i18n::addressinput::COUNTRY; |
| 31 using ::i18n::addressinput::DEPENDENT_LOCALITY; |
| 32 using ::i18n::addressinput::POSTAL_CODE; |
| 33 using ::i18n::addressinput::PreloadSupplier; |
| 34 using ::i18n::addressinput::RegionData; |
| 35 using ::i18n::addressinput::RegionDataBuilder; |
| 36 |
| 37 namespace { |
| 38 |
| 39 // The types of fields that describe a region. |
| 40 enum RegionField { |
| 41 KEY, |
| 42 NAME, |
| 43 }; |
| 44 |
| 45 // A bitset used for RegionField enum. |
| 46 typedef int Bitset; |
| 47 |
| 48 // A type to store a set of pointers to RegionData objects. |
| 49 typedef std::set<const RegionData*> RegionContainer; |
| 50 |
| 51 // Returns an empty bitset. |
| 52 Bitset EmptyBitset() { |
| 53 return 0; |
| 54 } |
| 55 |
| 56 // Returns bitset with |bit| already set. |
| 57 Bitset BitsetFromBit(int bit) { |
| 58 return 1 << bit; |
| 59 } |
| 60 |
| 61 // Sets the |bit| on |bitset|. |
| 62 Bitset SetBit(Bitset bitset, int bit) { |
| 63 return bitset | (1 << bit); |
| 64 } |
| 65 |
| 66 // Returns true if |bit| is set in the |bitset|. |
| 67 bool TestBit(Bitset bitset, int bit) { |
| 68 return !!(bitset & (1 << bit)); |
| 69 } |
| 70 |
| 71 Bitset AddBitsets(Bitset a, Bitset b) { |
| 72 return a | b; |
| 73 } |
| 74 |
| 75 // If |KEY| bit is set in the |value| bitset, then returns the key of the |
| 76 // region. If |NAME| bit is set in the |value| bitset, then returns the name of |
| 77 // the region. The |value| bitset should not be empty. |
| 78 const std::string& GetValueFromRegion(const RegionData* region, Bitset value) { |
| 79 DCHECK(region); |
| 80 |
| 81 if (TestBit(value, KEY)) |
| 82 return region->key(); |
| 83 else if (TestBit(value, NAME)) |
| 84 return region->name(); |
| 85 else |
| 86 NOTREACHED(); |
| 87 |
| 88 return region->key(); |
| 89 } |
| 90 |
| 91 // Collects regions based on whether they have a parent in the given list. |
| 92 class ParentedRegionCollector { |
| 93 public: |
| 94 // Retains a reference to both of the parameters. Does not make a copy of |
| 95 // |parent_regions|. Does not take ownership of |regions_with_parents|. The |
| 96 // |regions_with_parents| parameter should not be NULL. |
| 97 ParentedRegionCollector(const RegionContainer& parent_regions, |
| 98 RegionContainer* regions_with_parents) |
| 99 : parent_regions_(parent_regions), |
| 100 regions_with_parents_(regions_with_parents) { |
| 101 DCHECK(regions_with_parents_); |
| 102 } |
| 103 |
| 104 ~ParentedRegionCollector() {} |
| 105 |
| 106 // Adds |region_to_test| to the |regions_with_parents_| collection, if the |
| 107 // given region has a parent in |parent_regions_|. The |region_to_test| |
| 108 // parameter should not be NULL. |
| 109 void operator()(const RegionData* region_to_test) { |
| 110 DCHECK(region_to_test); |
| 111 if (parent_regions_.find(®ion_to_test->parent()) != |
| 112 parent_regions_.end()) { |
| 113 regions_with_parents_->insert(region_to_test); |
| 114 } |
| 115 } |
| 116 |
| 117 private: |
| 118 const RegionContainer& parent_regions_; |
| 119 RegionContainer* regions_with_parents_; |
| 120 }; |
| 121 |
| 122 // A region and its metadata useful for constructing a suggestion. |
| 123 struct SuggestionRegion { |
| 124 SuggestionRegion(const RegionData* region, |
| 125 Bitset region_fields, |
| 126 AddressField address_field) |
| 127 : region(region), |
| 128 region_fields(region_fields), |
| 129 address_field(address_field) {} |
| 130 |
| 131 ~SuggestionRegion() {} |
| 132 |
| 133 // Not owned region data. |
| 134 const RegionData* region; |
| 135 |
| 136 // A bitset of region fields of the |region|. |
| 137 Bitset region_fields; |
| 138 |
| 139 // The address field of the |region|. |
| 140 AddressField address_field; |
| 141 }; |
| 142 |
| 143 // Removes duplicates based on RegionData and sums up the region field bitsets |
| 144 // of the duplicates. |
| 145 void RemoveDuplicateRegions(std::vector<SuggestionRegion>* regions) { |
| 146 DCHECK(regions); |
| 147 |
| 148 std::map<const RegionData*, SuggestionRegion*> uniques; |
| 149 for (std::vector<SuggestionRegion>::iterator region_it = regions->begin(); |
| 150 region_it != regions->end(); ++region_it) { |
| 151 SuggestionRegion* lookup = &(*region_it); |
| 152 std::map<const RegionData*, SuggestionRegion*>::iterator unique_it = |
| 153 uniques.find(lookup->region); |
| 154 if (unique_it == uniques.end()) |
| 155 unique_it = uniques.insert(std::make_pair(lookup->region, lookup)).first; |
| 156 SuggestionRegion* target = unique_it->second; |
| 157 target->region_fields = |
| 158 AddBitsets(target->region_fields, lookup->region_fields); |
| 159 } |
| 160 |
| 161 std::vector<SuggestionRegion> result; |
| 162 for (std::map<const RegionData*, SuggestionRegion*>::const_iterator |
| 163 unique_it = uniques.begin(); |
| 164 unique_it != uniques.end(); ++unique_it) { |
| 165 result.push_back(*unique_it->second); |
| 166 } |
| 167 regions->swap(result); |
| 168 } |
| 169 |
| 170 } // namespace |
| 171 |
| 172 // Builds and caches tries from region data. |
| 173 class Suggestions::TrieCollection { |
| 174 public: |
| 175 TrieCollection(PreloadSupplier* supplier) |
| 176 : region_data_builder_(supplier) { |
| 177 UErrorCode error_code = U_ZERO_ERROR; |
| 178 collator_.reset( |
| 179 icu::Collator::createInstance(icu::Locale::getRoot(), error_code)); |
| 180 DCHECK(U_SUCCESS(error_code)); |
| 181 collator_->setStrength(icu::Collator::PRIMARY); |
| 182 } |
| 183 |
| 184 ~TrieCollection() { |
| 185 // Delete the maps and Trie objects owned by |tries_| field. |
| 186 for (RegionMap::const_iterator region_it = tries_.begin(); |
| 187 region_it != tries_.end(); ++region_it) { |
| 188 FieldMap* field_map = region_it->second; |
| 189 DCHECK(field_map); |
| 190 |
| 191 for (FieldMap::const_iterator field_it = field_map->begin(); |
| 192 field_it != field_map->end(); ++field_it) { |
| 193 IdMap* id_map = field_it->second; |
| 194 DCHECK(id_map); |
| 195 |
| 196 for (IdMap::const_iterator id_it = id_map->begin(); |
| 197 id_it != id_map->end(); ++id_it) { |
| 198 RegionDataTrie* trie = id_it->second; |
| 199 delete trie; |
| 200 } |
| 201 |
| 202 delete id_map; |
| 203 } |
| 204 |
| 205 delete field_map; |
| 206 } |
| 207 } |
| 208 |
| 209 // Builds a list of regions that can be suggested as completions for |
| 210 // |user_input| when the user is typing into |focused_field|. |
| 211 std::vector<SuggestionRegion> BuildSuggestionRegions( |
| 212 const AddressData& user_input, |
| 213 AddressField focused_field) { |
| 214 std::vector<SuggestionRegion> result; |
| 215 |
| 216 // Lazily initialize the mapping from fields to Trie objects. |
| 217 std::string unused_best_language; |
| 218 const RegionData& region_data = region_data_builder_.Build( |
| 219 user_input.region_code, user_input.language_code, |
| 220 &unused_best_language); |
| 221 RegionMap::iterator tries_it = tries_.find(®ion_data); |
| 222 if (tries_it == tries_.end()) { |
| 223 tries_it = tries_.insert( |
| 224 std::make_pair(®ion_data, new FieldMap)).first; |
| 225 if (!region_data.sub_regions().empty()) |
| 226 AddTriesForSubRegionsOf(region_data, COUNTRY, tries_it->second); |
| 227 } |
| 228 const FieldMap& field_map = *(tries_it->second); |
| 229 |
| 230 // Do not suggest anything if there's no suggestion data for any fields. |
| 231 if (field_map.empty()) |
| 232 return result; |
| 233 |
| 234 // Do not suggest anything if there's no suggestion data for the focused |
| 235 // field. |
| 236 if (focused_field != POSTAL_CODE && |
| 237 field_map.find(focused_field) == field_map.end()) { |
| 238 return result; |
| 239 } |
| 240 |
| 241 // Calculate the most specific field with suggestion data. |
| 242 AddressField deepest_field_with_data = DEPENDENT_LOCALITY; |
| 243 while (field_map.find(deepest_field_with_data) == field_map.end()) { |
| 244 deepest_field_with_data |
| 245 = static_cast<AddressField>(deepest_field_with_data - 1); |
| 246 } |
| 247 |
| 248 // Determine the most specific address field that can be suggested. |
| 249 AddressField most_specific_field = focused_field != POSTAL_CODE |
| 250 ? focused_field : DEPENDENT_LOCALITY; |
| 251 if (most_specific_field > deepest_field_with_data) { |
| 252 most_specific_field = deepest_field_with_data; |
| 253 } |
| 254 if (focused_field != POSTAL_CODE) { |
| 255 while (user_input.GetFieldValue(most_specific_field).empty() && |
| 256 most_specific_field > ADMIN_AREA) { |
| 257 most_specific_field = |
| 258 static_cast<AddressField>(most_specific_field - 1); |
| 259 } |
| 260 } |
| 261 |
| 262 // Canonicalized user input data for lookup in the tries. |
| 263 AddressData canonical_input = user_input; |
| 264 for (int i = ADMIN_AREA; i <= most_specific_field; ++i) { |
| 265 AddressField address_field = static_cast<AddressField>(i); |
| 266 canonical_input.SetFieldValue( |
| 267 address_field, |
| 268 CanonicalizeString(user_input.GetFieldValue(address_field))); |
| 269 } |
| 270 |
| 271 AddressField suggestion_field = ADMIN_AREA; |
| 272 |
| 273 typedef std::map<RegionField, RegionContainer*> IdRegionContainer; |
| 274 typedef std::map<AddressField, IdRegionContainer*> FieldRegionContainer; |
| 275 FieldRegionContainer matching_regions; |
| 276 |
| 277 IdRegionContainer* parent_id_regions = NULL; |
| 278 for (int i = ADMIN_AREA; i <= most_specific_field; ++i) { |
| 279 AddressField address_field = static_cast<AddressField>(i); |
| 280 |
| 281 IdRegionContainer* id_regions = new IdRegionContainer; |
| 282 matching_regions.insert(std::make_pair(address_field, id_regions)); |
| 283 |
| 284 FieldMap::const_iterator field_map_it = field_map.find(address_field); |
| 285 DCHECK(field_map_it != field_map.end()); |
| 286 const IdMap* id_map = field_map_it->second; |
| 287 DCHECK(id_map); |
| 288 |
| 289 for (IdMap::const_iterator id_map_it = id_map->begin(); |
| 290 id_map_it != id_map->end(); ++id_map_it) { |
| 291 RegionField id = id_map_it->first; |
| 292 |
| 293 RegionContainer* regions = new RegionContainer; |
| 294 id_regions->insert(std::make_pair(id, regions)); |
| 295 |
| 296 const RegionDataTrie* trie = id_map_it->second; |
| 297 DCHECK(trie); |
| 298 |
| 299 trie->FindDataForKeyPrefix( |
| 300 canonical_input.GetFieldValue(address_field), regions); |
| 301 |
| 302 // Filter out the regions whose parents do not match the user input. |
| 303 if (address_field > ADMIN_AREA) { |
| 304 DCHECK(parent_id_regions); |
| 305 |
| 306 IdRegionContainer::const_iterator parent_id_regions_it = |
| 307 parent_id_regions->find(id); |
| 308 DCHECK(parent_id_regions_it != parent_id_regions->end()); |
| 309 |
| 310 RegionContainer* parent_regions = parent_id_regions_it->second; |
| 311 DCHECK(parent_regions); |
| 312 |
| 313 RegionContainer regions_with_parents; |
| 314 std::for_each(regions->begin(), regions->end(), |
| 315 ParentedRegionCollector( |
| 316 *parent_regions, ®ions_with_parents)); |
| 317 regions->swap(regions_with_parents); |
| 318 } |
| 319 |
| 320 for (RegionContainer::const_iterator region_it = regions->begin(); |
| 321 region_it != regions->end(); ++region_it) { |
| 322 if (address_field > suggestion_field) { |
| 323 suggestion_field = address_field; |
| 324 result.clear(); |
| 325 } |
| 326 result.push_back( |
| 327 SuggestionRegion(*region_it, BitsetFromBit(id), address_field)); |
| 328 } |
| 329 } |
| 330 |
| 331 parent_id_regions = id_regions; |
| 332 } |
| 333 |
| 334 // Cleanup. |
| 335 for (FieldRegionContainer::const_iterator |
| 336 field_it = matching_regions.begin(); |
| 337 field_it != matching_regions.end(); ++field_it) { |
| 338 IdRegionContainer* id_regions = field_it->second; |
| 339 DCHECK(id_regions); |
| 340 for (IdRegionContainer::const_iterator id_it = id_regions->begin(); |
| 341 id_it != id_regions->end(); ++id_it) { |
| 342 RegionContainer* regions = id_it->second; |
| 343 delete regions; |
| 344 } |
| 345 delete id_regions; |
| 346 } |
| 347 |
| 348 RemoveDuplicateRegions(&result); |
| 349 return result; |
| 350 } |
| 351 |
| 352 private: |
| 353 typedef Trie<const RegionData*> RegionDataTrie; |
| 354 typedef std::map<RegionField, RegionDataTrie*> IdMap; |
| 355 typedef std::map<AddressField, IdMap*> FieldMap; |
| 356 typedef std::map<const RegionData*, FieldMap*> RegionMap; |
| 357 |
| 358 // Recursively builds tries from subregions of |parent| and add these tries to |
| 359 // the |field_map|. |
| 360 void AddTriesForSubRegionsOf(const RegionData& parent, |
| 361 AddressField parent_field, |
| 362 FieldMap* field_map) { |
| 363 DCHECK(!parent.sub_regions().empty()); |
| 364 DCHECK(field_map); |
| 365 |
| 366 AddressField field = static_cast<AddressField>(parent_field + 1); |
| 367 DCHECK_GE(field, ADMIN_AREA); |
| 368 DCHECK_LE(field, DEPENDENT_LOCALITY); |
| 369 |
| 370 FieldMap::iterator field_it = field_map->find(field); |
| 371 if (field_it == field_map->end()) |
| 372 field_it = field_map->insert(std::make_pair(field, new IdMap)).first; |
| 373 |
| 374 IdMap* id_map = field_it->second; |
| 375 DCHECK(id_map); |
| 376 |
| 377 IdMap::iterator key_it = id_map->find(KEY); |
| 378 if (key_it == id_map->end()) |
| 379 key_it = id_map->insert(std::make_pair(KEY, new RegionDataTrie)).first; |
| 380 RegionDataTrie* key_trie = key_it->second; |
| 381 DCHECK(key_trie); |
| 382 |
| 383 IdMap::iterator name_it = id_map->find(NAME); |
| 384 if (name_it == id_map->end()) |
| 385 name_it = id_map->insert(std::make_pair(NAME, new RegionDataTrie)).first; |
| 386 RegionDataTrie* name_trie = name_it->second; |
| 387 DCHECK(name_trie); |
| 388 |
| 389 for (std::vector<const RegionData*>::const_iterator region_it = |
| 390 parent.sub_regions().begin(); |
| 391 region_it != parent.sub_regions().end(); ++region_it) { |
| 392 const RegionData* region = *region_it; |
| 393 DCHECK(region); |
| 394 |
| 395 key_trie->AddDataForKey(CanonicalizeString(region->key()), region); |
| 396 |
| 397 if (!region->name().empty()) { |
| 398 name_trie->AddDataForKey(CanonicalizeString(region->name()), region); |
| 399 } |
| 400 |
| 401 if (!region->sub_regions().empty()) |
| 402 AddTriesForSubRegionsOf(*region, field, field_map); |
| 403 } |
| 404 } |
| 405 |
| 406 // Returns a canonical version of the string that can be used for comparing |
| 407 // strings regardless of diacritics and capitalization. |
| 408 // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas"); |
| 409 // CanonicalizeString("Texas") == CanonicalizeString("teXas"); |
| 410 // CanonicalizeString("Texas") != CanonicalizeString("California"); |
| 411 // |
| 412 // The output is not human-readable. |
| 413 // CanonicalizeString("Texas") != "Texas"; |
| 414 std::string CanonicalizeString(const std::string& original) const { |
| 415 icu::UnicodeString icu_str( |
| 416 original.c_str(), static_cast<int32_t>(original.length())); |
| 417 int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0); |
| 418 scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]); |
| 419 DCHECK(buffer.get()); |
| 420 int32_t filled_size = |
| 421 collator_->getSortKey(icu_str, buffer.get(), buffer_size); |
| 422 DCHECK_EQ(buffer_size, filled_size); |
| 423 return std::string(reinterpret_cast<const char*>(buffer.get())); |
| 424 } |
| 425 |
| 426 // A mapping of the country-level RegionData objects to a collection of Trie |
| 427 // objects. Tries and maps are owned. RegionDatas are not owned. |
| 428 RegionMap tries_; |
| 429 |
| 430 // Data source for region data. |
| 431 RegionDataBuilder region_data_builder_; |
| 432 |
| 433 // Canonicalizes strings for case and diacritic insensitive search. |
| 434 scoped_ptr<icu::Collator> collator_; |
| 435 |
| 436 DISALLOW_COPY_AND_ASSIGN(TrieCollection); |
| 437 }; |
| 438 |
| 439 Suggestions::Suggestions(PreloadSupplier* supplier) |
| 440 : trie_collection_(new TrieCollection(supplier)) {} |
| 441 |
| 442 Suggestions::~Suggestions() {} |
| 443 |
| 444 void Suggestions::GetSuggestions(const AddressData& user_input, |
| 445 AddressField focused_field, |
| 446 size_t suggestions_limit, |
| 447 std::vector<AddressData>* suggestions) { |
| 448 DCHECK(suggestions); |
| 449 DCHECK(focused_field == POSTAL_CODE || |
| 450 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY)); |
| 451 |
| 452 // Do not suggest anything if the user input is empty. |
| 453 if (user_input.IsFieldEmpty(focused_field)) |
| 454 return; |
| 455 |
| 456 // Build the list of regions that match |user_input| when the user is typing |
| 457 // in the |focused_field|. |
| 458 const std::vector<SuggestionRegion>& regions = |
| 459 trie_collection_->BuildSuggestionRegions(user_input, focused_field); |
| 460 |
| 461 // Generate suggestions based on the regions. |
| 462 for (std::vector<SuggestionRegion>::const_iterator |
| 463 region_it = regions.begin(); |
| 464 region_it != regions.end(); ++region_it) { |
| 465 // Do not add more suggestions than |suggestions_limit|. |
| 466 if (suggestions->size() >= suggestions_limit) { |
| 467 suggestions->clear(); |
| 468 return; |
| 469 } |
| 470 |
| 471 AddressData suggestion; |
| 472 suggestion.region_code = user_input.region_code; |
| 473 suggestion.postal_code = user_input.postal_code; |
| 474 |
| 475 // Traverse the tree of regions from the most specific |region| to the |
| 476 // country-wide "root" of the tree. Use the region names found at each of |
| 477 // the levels of the ruleset tree to build the |suggestion|. |
| 478 DCHECK(region_it->region); |
| 479 AddressField address_field = region_it->address_field; |
| 480 for (const RegionData* suggestion_region = region_it->region; |
| 481 suggestion_region->has_parent(); |
| 482 suggestion_region = &suggestion_region->parent()) { |
| 483 suggestion.SetFieldValue( |
| 484 address_field, |
| 485 GetValueFromRegion(suggestion_region, region_it->region_fields)); |
| 486 address_field = static_cast<AddressField>(address_field - 1); |
| 487 } |
| 488 |
| 489 suggestions->push_back(suggestion); |
| 490 } |
| 491 } |
| 492 |
| 493 } // namespace autofill |
OLD | NEW |