Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "third_party/libaddressinput/chromium/suggestions.h" | |
| 6 | |
| 7 #include <bitset> | |
| 8 #include <map> | |
| 9 #include <queue> | |
| 10 #include <set> | |
| 11 #include <utility> | |
| 12 | |
| 13 #include "base/basictypes.h" | |
| 14 #include "base/logging.h" | |
| 15 #include "base/memory/scoped_ptr.h" | |
| 16 #include "third_party/icu/source/common/unicode/errorcode.h" | |
| 17 #include "third_party/icu/source/common/unicode/locid.h" | |
| 18 #include "third_party/icu/source/common/unicode/unistr.h" | |
| 19 #include "third_party/icu/source/common/unicode/utypes.h" | |
| 20 #include "third_party/icu/source/i18n/unicode/coll.h" | |
| 21 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da ta.h" | |
| 22 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_su pplier.h" | |
| 23 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a.h" | |
| 24 | |
| 25 namespace autofill { | |
| 26 | |
| 27 using ::i18n::addressinput::AddressData; | |
| 28 using ::i18n::addressinput::AddressField; | |
| 29 using ::i18n::addressinput::ADMIN_AREA; | |
| 30 using ::i18n::addressinput::COUNTRY; | |
| 31 using ::i18n::addressinput::DEPENDENT_LOCALITY; | |
| 32 using ::i18n::addressinput::POSTAL_CODE; | |
| 33 using ::i18n::addressinput::PreloadSupplier; | |
| 34 using ::i18n::addressinput::RegionData; | |
| 35 using ::i18n::addressinput::RegionDataBuilder; | |
| 36 | |
| 37 namespace { | |
| 38 | |
| 39 // A type to store a set of pointers to RegionData objects. | |
| 40 typedef std::set<const RegionData*> RegionContainer; | |
| 41 | |
| 42 // Collects regions based on whether they have a parent in the given list. | |
| 43 class ParentedRegionCollector { | |
| 44 public: | |
| 45 // Retains a reference to both of the parameters. Does not make a copy of | |
| 46 // |parent_regions|. Does not take ownership of |regions_with_parents|. The | |
| 47 // |regions_with_parents| parameter should not be NULL. | |
| 48 ParentedRegionCollector(const RegionContainer& parent_regions, | |
| 49 RegionContainer* regions_with_parents) | |
| 50 : parent_regions_(parent_regions), | |
| 51 regions_with_parents_(regions_with_parents) { | |
| 52 DCHECK(regions_with_parents_); | |
| 53 } | |
| 54 | |
| 55 ~ParentedRegionCollector() {} | |
| 56 | |
| 57 // Adds |region_to_test| to the |regions_with_parents_| collection, if the | |
| 58 // given region has a parent in |parent_regions_|. The |region_to_test| | |
| 59 // parameter should not be NULL. | |
| 60 void operator()(const RegionData* region_to_test) { | |
| 61 DCHECK(region_to_test); | |
| 62 if (parent_regions_.find(®ion_to_test->parent()) != | |
| 63 parent_regions_.end()) { | |
| 64 regions_with_parents_->insert(region_to_test); | |
| 65 } | |
| 66 } | |
| 67 | |
| 68 private: | |
| 69 const RegionContainer parent_regions_; | |
| 70 RegionContainer* regions_with_parents_; | |
| 71 }; | |
| 72 | |
| 73 } // namespace | |
| 74 | |
| 75 class Suggestions::CanonicalizerImpl { | |
| 76 public: | |
| 77 CanonicalizerImpl() { | |
| 78 UErrorCode error_code = U_ZERO_ERROR; | |
| 79 collator_.reset( | |
| 80 icu::Collator::createInstance(icu::Locale::getRoot(), error_code)); | |
| 81 DCHECK(U_SUCCESS(error_code)); | |
| 82 collator_->setStrength(icu::Collator::PRIMARY); | |
| 83 } | |
| 84 | |
| 85 ~CanonicalizerImpl() {} | |
| 86 | |
| 87 // Returns a canonical version of the string that can be used for comparing | |
| 88 // strings regardless of diacritics and capitalization. | |
| 89 // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas"); | |
| 90 // CanonicalizeString("Texas") == CanonicalizeString("teXas"); | |
| 91 // CanonicalizeString("Texas") != CanonicalizeString("California"); | |
| 92 // | |
| 93 // The output is not human-readable. | |
| 94 // CanonicalizeString("Texas") != "Texas"; | |
| 95 std::string CanonicalizeString(const std::string& original) { | |
| 96 icu::UnicodeString icu_str( | |
| 97 original.c_str(), static_cast<int32_t>(original.length())); | |
| 98 int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0); | |
| 99 scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]); | |
| 100 DCHECK(buffer.get()); | |
| 101 int32_t filled_size = | |
| 102 collator_->getSortKey(icu_str, buffer.get(), buffer_size); | |
| 103 DCHECK_EQ(buffer_size, filled_size); | |
| 104 return std::string(reinterpret_cast<const char*>(buffer.get())); | |
| 105 } | |
| 106 | |
| 107 private: | |
| 108 scoped_ptr<icu::Collator> collator_; | |
| 109 | |
| 110 DISALLOW_COPY_AND_ASSIGN(CanonicalizerImpl); | |
| 111 }; | |
| 112 | |
| 113 Suggestions::Suggestions(PreloadSupplier* supplier) | |
| 114 : region_data_builder_(supplier), | |
| 115 canonicalizer_(new CanonicalizerImpl) {} | |
| 116 | |
| 117 Suggestions::~Suggestions() { | |
| 118 // Delete the maps and Trie objects owned by |tries_| field. | |
| 119 for (RegionMap::const_iterator region_it = tries_.begin(); | |
|
please use gerrit instead
2014/06/05 22:22:49
Investigate: can we put RegionMap into its own str
| |
| 120 region_it != tries_.end(); ++region_it) { | |
| 121 FieldMap* field_map = region_it->second; | |
| 122 DCHECK(field_map); | |
| 123 | |
| 124 for (FieldMap::const_iterator field_it = field_map->begin(); | |
| 125 field_it != field_map->end(); ++field_it) { | |
| 126 IdMap* id_map = field_it->second; | |
| 127 DCHECK(id_map); | |
| 128 | |
| 129 for (IdMap::const_iterator id_it = id_map->begin(); | |
| 130 id_it != id_map->end(); ++id_it) { | |
| 131 RegionDataTrie* trie = id_it->second; | |
| 132 delete trie; | |
| 133 } | |
| 134 | |
| 135 delete id_map; | |
| 136 } | |
| 137 | |
| 138 delete field_map; | |
| 139 } | |
| 140 } | |
| 141 | |
| 142 void Suggestions::GetSuggestions(const AddressData& user_input, | |
| 143 AddressField focused_field, | |
| 144 size_t suggestions_limit, | |
| 145 std::vector<AddressData>* suggestions) { | |
| 146 DCHECK(suggestions); | |
| 147 suggestions->clear(); | |
| 148 | |
| 149 // Do not suggest anything if the user is typing in the field for which | |
| 150 // there's no suggestion data. | |
| 151 if (focused_field != POSTAL_CODE && | |
|
please use gerrit instead
2014/06/05 22:22:49
Remove special handling for POSTAL CODE!
please use gerrit instead
2014/06/09 23:28:17
Simply removing handling of postal code will break
| |
| 152 (focused_field < ADMIN_AREA || focused_field > DEPENDENT_LOCALITY)) { | |
| 153 return; | |
| 154 } | |
| 155 | |
| 156 // Do not suggest anything if the user input is empty. | |
| 157 if (user_input.IsFieldEmpty(focused_field)) | |
| 158 return; | |
| 159 | |
| 160 // Lazily initialize the mapping from fields to Trie objects. | |
| 161 const FieldMap& field_map = GetTries(user_input); | |
| 162 | |
| 163 // Do not suggest anything if there's no suggestion data for any fields. | |
| 164 if (field_map.empty()) | |
| 165 return; | |
| 166 | |
| 167 // Do not suggest anything if there's no suggestion data for the focused | |
| 168 // field. | |
| 169 if (focused_field != POSTAL_CODE && | |
| 170 field_map.find(focused_field) == field_map.end()) { | |
| 171 return; | |
| 172 } | |
| 173 | |
| 174 // Calculate the most specific field with suggestion data. | |
| 175 AddressField deepest_field_with_data = DEPENDENT_LOCALITY; | |
| 176 while (field_map.find(deepest_field_with_data) == field_map.end()) { | |
| 177 deepest_field_with_data | |
| 178 = static_cast<AddressField>(deepest_field_with_data - 1); | |
| 179 } | |
| 180 | |
| 181 // Determine the most specific address field that can be suggested. | |
| 182 AddressField most_specific_field = focused_field != POSTAL_CODE | |
| 183 ? focused_field : DEPENDENT_LOCALITY; | |
| 184 if (most_specific_field > deepest_field_with_data) { | |
| 185 most_specific_field = deepest_field_with_data; | |
| 186 } | |
| 187 if (focused_field != POSTAL_CODE) { | |
| 188 while (user_input.GetFieldValue(most_specific_field).empty() && | |
| 189 most_specific_field > ADMIN_AREA) { | |
| 190 most_specific_field = static_cast<AddressField>(most_specific_field - 1); | |
| 191 } | |
| 192 } | |
| 193 | |
| 194 // The regions that match the most precise fields in the address. (Locality is | |
| 195 // more precise than administrative area, for example.) | |
| 196 typedef std::bitset<REGION_IDENTITY_FIELDS_SIZE> MatchingIds; | |
| 197 typedef std::map<const RegionData*, MatchingIds> RegionIds; | |
| 198 scoped_ptr<RegionIds> precise_regions; | |
|
please use gerrit instead
2014/06/05 22:22:49
Use more data structures that hide the complexity
| |
| 199 | |
| 200 { // Begin lifetime of |suggestion_regions|. | |
| 201 // The regions that match all fields in the address. | |
| 202 typedef std::map<AddressField, RegionIds*> FieldRegionIds; | |
| 203 FieldRegionIds suggestion_regions; | |
| 204 AddressField suggestion_field = ADMIN_AREA; | |
| 205 | |
| 206 { // Begin lifetime of |matching_regions|. | |
| 207 // All regions that match at least one field in the address. | |
| 208 typedef std::map<RegionIdentityField, RegionContainer*> IdRegionContainer; | |
| 209 typedef std::map<AddressField, IdRegionContainer*> FieldRegionContainer; | |
| 210 FieldRegionContainer matching_regions; | |
| 211 | |
| 212 // Find all regions that match user input. | |
| 213 IdRegionContainer* parent_id_regions = NULL; | |
| 214 for (int i = ADMIN_AREA; i <= most_specific_field; ++i) { | |
| 215 AddressField address_field = static_cast<AddressField>(i); | |
| 216 | |
| 217 const std::string& canonical_user_input = | |
| 218 canonicalizer_->CanonicalizeString( | |
| 219 user_input.GetFieldValue(address_field)); | |
| 220 | |
| 221 IdRegionContainer* id_regions = new IdRegionContainer; | |
| 222 matching_regions.insert(std::make_pair(address_field, id_regions)); | |
| 223 | |
| 224 FieldMap::const_iterator field_map_it = field_map.find(address_field); | |
| 225 DCHECK(field_map_it != field_map.end()); | |
| 226 const IdMap* id_map = field_map_it->second; | |
| 227 DCHECK(id_map); | |
| 228 | |
| 229 for (IdMap::const_iterator id_map_it = id_map->begin(); | |
| 230 id_map_it != id_map->end(); ++id_map_it) { | |
| 231 RegionIdentityField id = id_map_it->first; | |
| 232 DCHECK_LT(id, REGION_IDENTITY_FIELDS_SIZE); | |
| 233 | |
| 234 RegionContainer* regions = new RegionContainer; | |
| 235 id_regions->insert(std::make_pair(id, regions)); | |
| 236 | |
| 237 const RegionDataTrie* trie = id_map_it->second; | |
| 238 DCHECK(trie); | |
| 239 | |
| 240 trie->FindDataForKeyPrefix(canonical_user_input, regions); | |
| 241 | |
| 242 // Filter out the regions whose parents do not match the user input. | |
| 243 if (address_field > ADMIN_AREA) { | |
| 244 DCHECK(parent_id_regions); | |
| 245 | |
| 246 IdRegionContainer::const_iterator parent_id_regions_it = | |
| 247 parent_id_regions->find(id); | |
| 248 DCHECK(parent_id_regions_it != parent_id_regions->end()); | |
| 249 | |
| 250 RegionContainer* parent_regions = parent_id_regions_it->second; | |
| 251 DCHECK(parent_regions); | |
| 252 | |
| 253 RegionContainer regions_with_parents; | |
| 254 std::for_each(regions->begin(), regions->end(), | |
| 255 ParentedRegionCollector( | |
| 256 *parent_regions, ®ions_with_parents)); | |
| 257 regions->swap(regions_with_parents); | |
| 258 } | |
| 259 } | |
| 260 | |
| 261 parent_id_regions = id_regions; | |
| 262 } | |
| 263 | |
| 264 // Collect the RegionIdentityField into a single bitmask per RegionData. | |
| 265 for (FieldRegionContainer::const_iterator field_it = | |
| 266 matching_regions.begin(); | |
| 267 field_it != matching_regions.end(); ++field_it) { | |
| 268 AddressField address_field = field_it->first; | |
| 269 const IdRegionContainer* id_regions = field_it->second; | |
| 270 DCHECK(id_regions); | |
| 271 | |
| 272 for (IdRegionContainer::const_iterator id_it = id_regions->begin(); | |
| 273 id_it != id_regions->end(); ++id_it) { | |
| 274 RegionIdentityField id = id_it->first; | |
| 275 const RegionContainer* regions = id_it->second; | |
| 276 DCHECK(regions); | |
| 277 | |
| 278 for (RegionContainer::const_iterator region_it = regions->begin(); | |
| 279 region_it != regions->end(); ++region_it) { | |
| 280 const RegionData* region = *region_it; | |
| 281 DCHECK(region); | |
| 282 | |
| 283 FieldRegionIds::iterator field_region_id_it = | |
| 284 suggestion_regions.find(address_field); | |
| 285 if (field_region_id_it == suggestion_regions.end()) { | |
| 286 field_region_id_it = suggestion_regions.insert( | |
| 287 std::make_pair(address_field, new RegionIds)).first; | |
| 288 } | |
| 289 DCHECK(field_region_id_it->second); | |
| 290 | |
| 291 RegionIds& region_ids = *(field_region_id_it->second); | |
| 292 region_ids[region].set(id); | |
| 293 | |
| 294 if (suggestion_field < address_field) | |
| 295 suggestion_field = address_field; | |
| 296 } | |
| 297 } | |
| 298 } | |
| 299 | |
| 300 // Clean up |matching_regions|. | |
| 301 for (FieldRegionContainer::const_iterator field_it = | |
| 302 matching_regions.begin(); | |
| 303 field_it != matching_regions.end(); ++field_it) { | |
| 304 IdRegionContainer* id_regions = field_it->second; | |
| 305 DCHECK(id_regions); | |
| 306 for (IdRegionContainer::const_iterator id_it = id_regions->begin(); | |
| 307 id_it != id_regions->end(); ++id_it) { | |
| 308 RegionContainer* regions = id_it->second; | |
| 309 delete regions; | |
| 310 } | |
| 311 delete id_regions; | |
| 312 } | |
| 313 } // End lifetime of |matching_regions|. | |
| 314 | |
| 315 // Filter out the less precise regions. | |
| 316 FieldRegionIds::const_iterator suggestion_regions_it = | |
| 317 suggestion_regions.find(suggestion_field); | |
| 318 if (suggestion_regions_it == suggestion_regions.end()) | |
| 319 return; | |
| 320 | |
| 321 precise_regions.reset(suggestion_regions_it->second); | |
| 322 DCHECK(precise_regions); | |
| 323 | |
| 324 for (FieldRegionIds::iterator field_region_id_it = | |
| 325 suggestion_regions.begin(); | |
| 326 field_region_id_it != suggestion_regions.end(); | |
| 327 ++field_region_id_it) { | |
| 328 if (precise_regions.get() != field_region_id_it->second) | |
| 329 delete field_region_id_it->second; | |
| 330 } | |
| 331 } // End lifetime of |suggestion_regions|. | |
| 332 | |
| 333 // Generate suggestions based on the regions. Use a RegionIdentityField from | |
| 334 // the bitset to generate address field values. | |
| 335 DCHECK(precise_regions); | |
| 336 for (RegionIds::const_iterator suggestion_it = precise_regions->begin(); | |
| 337 suggestion_it != precise_regions->end(); ++suggestion_it) { | |
| 338 // Do not add more suggestions than |suggestions_limit|. | |
| 339 if (suggestions->size() >= suggestions_limit) { | |
| 340 suggestions->clear(); | |
| 341 return; | |
| 342 } | |
| 343 | |
| 344 const MatchingIds& matching_ids = suggestion_it->second; | |
| 345 RegionIdentityField id = KEY; | |
| 346 if (matching_ids.test(KEY)) { | |
| 347 id = KEY; | |
| 348 } else if (matching_ids.test(NAME)) { | |
| 349 id = NAME; | |
| 350 } else { | |
| 351 NOTREACHED(); | |
| 352 } | |
| 353 | |
| 354 AddressData suggestion; | |
| 355 suggestion.region_code = user_input.region_code; | |
| 356 suggestion.postal_code = user_input.postal_code; | |
| 357 | |
| 358 // Traverse the tree of regions from the most specific |region| to the | |
| 359 // country-wide "root" of the tree. Use the region names found at each of | |
| 360 // the levels of the ruleset tree to build the |suggestion|. | |
| 361 std::vector<const RegionData*> suggestion_fields; | |
| 362 DCHECK(suggestion_it->first); | |
| 363 for (const RegionData* suggestion_region = suggestion_it->first; | |
| 364 suggestion_region->has_parent(); | |
| 365 suggestion_region = &suggestion_region->parent()) { | |
| 366 suggestion_fields.insert(suggestion_fields.begin(), suggestion_region); | |
| 367 } | |
| 368 AddressField address_field = ADMIN_AREA; | |
| 369 for (std::vector<const RegionData*>::const_iterator field_it = | |
| 370 suggestion_fields.begin(); | |
| 371 field_it != suggestion_fields.end(); ++field_it) { | |
| 372 suggestion.SetFieldValue( | |
| 373 address_field, | |
| 374 id == KEY ? (*field_it)->key() : (*field_it)->name()); | |
| 375 address_field = static_cast<AddressField>(address_field + 1); | |
| 376 } | |
| 377 | |
| 378 suggestions->push_back(suggestion); | |
| 379 } | |
| 380 } | |
| 381 | |
| 382 void Suggestions::AddTriesForSubRegionsOf(const RegionData& parent, | |
| 383 AddressField parent_field, | |
| 384 FieldMap* field_map) { | |
| 385 DCHECK(!parent.sub_regions().empty()); | |
| 386 DCHECK(field_map); | |
| 387 | |
| 388 AddressField field = static_cast<AddressField>(parent_field + 1); | |
| 389 DCHECK_GE(field, ADMIN_AREA); | |
| 390 DCHECK_LE(field, DEPENDENT_LOCALITY); | |
| 391 | |
| 392 FieldMap::iterator field_it = field_map->find(field); | |
| 393 if (field_it == field_map->end()) | |
| 394 field_it = field_map->insert(std::make_pair(field, new IdMap)).first; | |
| 395 | |
| 396 IdMap* id_map = field_it->second; | |
| 397 DCHECK(id_map); | |
| 398 | |
| 399 IdMap::iterator key_it = id_map->find(KEY); | |
| 400 if (key_it == id_map->end()) | |
| 401 key_it = id_map->insert(std::make_pair(KEY, new RegionDataTrie)).first; | |
| 402 RegionDataTrie* key_trie = key_it->second; | |
| 403 DCHECK(key_trie); | |
| 404 | |
| 405 IdMap::iterator name_it = id_map->find(NAME); | |
| 406 if (name_it == id_map->end()) | |
| 407 name_it = id_map->insert(std::make_pair(NAME, new RegionDataTrie)).first; | |
| 408 RegionDataTrie* name_trie = name_it->second; | |
| 409 DCHECK(name_trie); | |
| 410 | |
| 411 for (std::vector<const RegionData*>::const_iterator region_it = | |
| 412 parent.sub_regions().begin(); | |
| 413 region_it != parent.sub_regions().end(); ++region_it) { | |
| 414 const RegionData* region = *region_it; | |
| 415 DCHECK(region); | |
| 416 | |
| 417 key_trie->AddDataForKey( | |
| 418 canonicalizer_->CanonicalizeString(region->key()), region); | |
| 419 | |
| 420 if (!region->name().empty()) { | |
| 421 name_trie->AddDataForKey( | |
| 422 canonicalizer_->CanonicalizeString(region->name()), region); | |
| 423 } | |
| 424 | |
| 425 if (!region->sub_regions().empty()) | |
| 426 AddTriesForSubRegionsOf(*region, field, field_map); | |
| 427 } | |
| 428 } | |
| 429 | |
| 430 const Suggestions::FieldMap& Suggestions::GetTries( | |
| 431 const AddressData& user_input) { | |
| 432 std::string unused_best_region_tree_language_tag; | |
| 433 const RegionData& region_data = region_data_builder_.Build( | |
| 434 user_input.region_code, user_input.language_code, | |
| 435 &unused_best_region_tree_language_tag); | |
| 436 | |
| 437 RegionMap::iterator tries_it = tries_.find(®ion_data); | |
| 438 if (tries_it == tries_.end()) { | |
| 439 tries_it = tries_.insert(std::make_pair(®ion_data, new FieldMap)).first; | |
| 440 if (!region_data.sub_regions().empty()) | |
| 441 AddTriesForSubRegionsOf(region_data, COUNTRY, tries_it->second); | |
| 442 } | |
| 443 | |
| 444 return *(tries_it->second); | |
| 445 } | |
| 446 | |
| 447 } // namespace autofill | |
| OLD | NEW |