OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "third_party/libaddressinput/chromium/suggestions.h" |
| 6 |
| 7 // Use "base/memory/scoped_ptr.h" instead. |
| 8 #define I18N_ADDRESSINPUT_UTIL_SCOPED_PTR_H_ |
| 9 |
| 10 #include "base/basictypes.h" |
| 11 #include "base/logging.h" |
| 12 #include "base/memory/scoped_ptr.h" |
| 13 #include "third_party/icu/source/common/unicode/errorcode.h" |
| 14 #include "third_party/icu/source/common/unicode/locid.h" |
| 15 #include "third_party/icu/source/common/unicode/unistr.h" |
| 16 #include "third_party/icu/source/common/unicode/utypes.h" |
| 17 #include "third_party/icu/source/i18n/unicode/coll.h" |
| 18 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da
ta.h" |
| 19 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_su
pplier.h" |
| 20 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat
a.h" |
| 21 |
| 22 namespace autofill { |
| 23 |
| 24 using ::i18n::addressinput::ADMIN_AREA; |
| 25 using ::i18n::addressinput::DEPENDENT_LOCALITY; |
| 26 using ::i18n::addressinput::RegionData; |
| 27 |
| 28 typedef std::set<const RegionData*> RegionContainer; |
| 29 |
| 30 namespace { |
| 31 |
| 32 // Collects regions based on whether they have a parent in the given list. |
| 33 class ParentedRegionCollector { |
| 34 public: |
| 35 // Retains a reference to both of the parameters. Does not make a copy of |
| 36 // |parent_regions|. Does not take ownership of |regions_with_parents|. The |
| 37 // |regions_with_parents| parameter should not be NULL. |
| 38 ParentedRegionCollector(const RegionContainer& parent_regions, |
| 39 RegionContainer* regions_with_parents) |
| 40 : parent_regions_(parent_regions), |
| 41 regions_with_parents_(regions_with_parents) { |
| 42 DCHECK(regions_with_parents_); |
| 43 } |
| 44 |
| 45 ~ParentedRegionCollector() {} |
| 46 |
| 47 // Adds |region_to_test| to the |regions_with_parents_| collection, if the |
| 48 // given region has a parent in |parent_regions_|. The |region_to_test| |
| 49 // parameter should not be NULL. |
| 50 void operator()(const RegionData* region_to_test) { |
| 51 DCHECK(region_to_test); |
| 52 if (parent_regions_.find(®ion_to_test->parent()) != |
| 53 parent_regions_.end()) { |
| 54 regions_with_parents_->insert(region_to_test); |
| 55 } |
| 56 } |
| 57 |
| 58 private: |
| 59 const RegionContainer parent_regions_; |
| 60 RegionContainer* regions_with_parents_; |
| 61 }; |
| 62 |
| 63 } // namespace |
| 64 |
| 65 class Suggestions::CanonicalizerImpl { |
| 66 public: |
| 67 CanonicalizerImpl() { |
| 68 UErrorCode error_code = U_ZERO_ERROR; |
| 69 collator_.reset( |
| 70 icu::Collator::createInstance(icu::Locale::getRoot(), error_code)); |
| 71 DCHECK(U_SUCCESS(error_code)); |
| 72 collator_->setStrength(icu::Collator::PRIMARY); |
| 73 } |
| 74 |
| 75 ~CanonicalizerImpl() {} |
| 76 |
| 77 // Returns a canonical version of the string that can be used for comparing |
| 78 // strings regardless of diacritics and capitalization. |
| 79 // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas"); |
| 80 // CanonicalizeString("Texas") == CanonicalizeString("teXas"); |
| 81 // CanonicalizeString("Texas") != CanonicalizeString("California"); |
| 82 // |
| 83 // The output is not human-readable. |
| 84 // CanonicalizeString("Texas") != "Texas"; |
| 85 std::string CanonicalizeString(const std::string& original) { |
| 86 icu::UnicodeString icu_str( |
| 87 original.c_str(), static_cast<int32_t>(original.length())); |
| 88 int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0); |
| 89 scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]); |
| 90 DCHECK(buffer.get()); |
| 91 int32_t filled_size = |
| 92 collator_->getSortKey(icu_str, buffer.get(), buffer_size); |
| 93 DCHECK_EQ(buffer_size, filled_size); |
| 94 return std::string(reinterpret_cast<const char*>(buffer.get())); |
| 95 } |
| 96 |
| 97 private: |
| 98 scoped_ptr<icu::Collator> collator_; |
| 99 |
| 100 DISALLOW_COPY_AND_ASSIGN(CanonicalizerImpl); |
| 101 }; |
| 102 |
| 103 Suggestions::Suggestions(const PreloadSupplier* supplier) |
| 104 : supplier_(supplier), |
| 105 canonicalizer_(new CanonicalizerImpl) {} |
| 106 |
| 107 Suggestions::~Suggestions() { |
| 108 // Delete the maps and trie objects owned by |tries_| field. |
| 109 for (RegionCodeMap::const_iterator region_it = tries_.begin(); |
| 110 region_it != tries_.end(); ++region_it) { |
| 111 LanguageTagMap* lang_map = region_it->second; |
| 112 DCHECK(lang_map); |
| 113 |
| 114 for (LanguageTagMap::const_iterator lang_it = lang_map->begin(); |
| 115 lang_it != lang_map->end(); ++lang_it) { |
| 116 AddressFieldMap* field_map = lang_it->second; |
| 117 DCHECK(field_map); |
| 118 |
| 119 for (AddressFieldMap::const_iterator field_it = field_map->begin(); |
| 120 field_it != field_map->end(); ++field_it) { |
| 121 RegionIdMap* id_map = field_it->second; |
| 122 DCHECK(id_map); |
| 123 |
| 124 for (RegionIdMap::const_iterator id_it = id_map->begin(); |
| 125 id_it != id_map->end(); ++id_it) { |
| 126 // The tries do not own the region objects. |
| 127 Trie<const RegionData*>* trie = id_it->second; |
| 128 delete trie; |
| 129 } |
| 130 delete id_map; |
| 131 } |
| 132 delete field_map; |
| 133 } |
| 134 delete lang_map; |
| 135 } |
| 136 } |
| 137 |
| 138 void Suggestions::GetSuggestions(const AddressData& user_input, |
| 139 AddressField focused_field, |
| 140 size_t suggestions_limit, |
| 141 std::vector<AddressData>* suggestions) { |
| 142 /* |
| 143 !!!TODO: IMPL!!! |
| 144 DCHECK(suggestions); |
| 145 |
| 146 std::map<std::string, Ruleset*>::const_iterator ruleset_it = |
| 147 rules_.find(user_input.region_code); |
| 148 |
| 149 if (ruleset_it == rules_.end()) { |
| 150 return |
| 151 loading_rules_.find(user_input.region_code) != loading_rules_.end() |
| 152 ? RULES_NOT_READY |
| 153 : RULES_UNAVAILABLE; |
| 154 } |
| 155 |
| 156 if (suggestions == NULL) { |
| 157 return SUCCESS; |
| 158 } |
| 159 suggestions->clear(); |
| 160 |
| 161 assert(ruleset_it->second != NULL); |
| 162 |
| 163 // Do not suggest anything if the user is typing in the field for which |
| 164 // there's no validation data. |
| 165 if (focused_field != POSTAL_CODE && |
| 166 (focused_field < ADMIN_AREA || focused_field > DEPENDENT_LOCALITY)) { |
| 167 return SUCCESS; |
| 168 } |
| 169 |
| 170 // Do not suggest anything if the user input is empty. |
| 171 if (user_input.GetFieldValue(focused_field).empty()) { |
| 172 return SUCCESS; |
| 173 } |
| 174 |
| 175 const Ruleset& country_ruleset = *ruleset_it->second; |
| 176 const Rule& country_rule = |
| 177 country_ruleset.GetLanguageCodeRule(user_input.language_code); |
| 178 |
| 179 // Do not suggest anything if the user is typing the postal code that is not |
| 180 // valid for the country. |
| 181 if (!user_input.postal_code.empty() && |
| 182 focused_field == POSTAL_CODE && |
| 183 !country_rule.GetPostalCodeFormat().empty() && |
| 184 !ValueMatchesPrefixRegex( |
| 185 user_input.postal_code, country_rule.GetPostalCodeFormat())) { |
| 186 return SUCCESS; |
| 187 } |
| 188 |
| 189 // Initialize the prefix search index lazily. |
| 190 if (!ruleset_it->second->prefix_search_index_ready()) { |
| 191 ruleset_it->second->BuildPrefixSearchIndex(); |
| 192 } |
| 193 |
| 194 if (focused_field != POSTAL_CODE && |
| 195 focused_field > country_ruleset.deepest_ruleset_level()) { |
| 196 return SUCCESS; |
| 197 } |
| 198 |
| 199 // Determine the most specific address field that can be suggested. |
| 200 AddressField suggestion_field = focused_field != POSTAL_CODE |
| 201 ? focused_field : DEPENDENT_LOCALITY; |
| 202 if (suggestion_field > country_ruleset.deepest_ruleset_level()) { |
| 203 suggestion_field = country_ruleset.deepest_ruleset_level(); |
| 204 } |
| 205 if (focused_field != POSTAL_CODE) { |
| 206 while (user_input.GetFieldValue(suggestion_field).empty() && |
| 207 suggestion_field > ADMIN_AREA) { |
| 208 suggestion_field = static_cast<AddressField>(suggestion_field - 1); |
| 209 } |
| 210 } |
| 211 |
| 212 // Find all rulesets that match user input. |
| 213 AddressFieldRulesets rulesets; |
| 214 for (int i = ADMIN_AREA; i <= suggestion_field; ++i) { |
| 215 for (int j = Rule::KEY; j <= Rule::LATIN_NAME; ++j) { |
| 216 AddressField address_field = static_cast<AddressField>(i); |
| 217 Rule::IdentityField rule_field = static_cast<Rule::IdentityField>(j); |
| 218 |
| 219 // Find all rulesets at |address_field| level whose |rule_field| starts |
| 220 // with user input value. |
| 221 country_ruleset.FindRulesetsByPrefix( |
| 222 user_input.language_code, address_field, rule_field, |
| 223 user_input.GetFieldValue(address_field), |
| 224 &rulesets[address_field][rule_field]); |
| 225 |
| 226 // Filter out the rulesets whose parents do not match the user input. |
| 227 if (address_field > ADMIN_AREA) { |
| 228 AddressField parent_field = |
| 229 static_cast<AddressField>(address_field - 1); |
| 230 Rulesets rulesets_with_parents; |
| 231 std::for_each( |
| 232 rulesets[address_field][rule_field].begin(), |
| 233 rulesets[address_field][rule_field].end(), |
| 234 ParentedRulesetCollector(rulesets[parent_field][rule_field], |
| 235 &rulesets_with_parents)); |
| 236 rulesets[address_field][rule_field].swap(rulesets_with_parents); |
| 237 } |
| 238 } |
| 239 } |
| 240 |
| 241 // Determine the fields in the rules that match the user input. This |
| 242 // operation converts a map of Rule::IdentityField value -> Ruleset into a |
| 243 // map of Ruleset -> Rule::IdentityField bitset. |
| 244 std::map<const Ruleset*, MatchingRuleFields> suggestion_rulesets; |
| 245 for (IdentityFieldRulesets::const_iterator rule_field_it = |
| 246 rulesets[suggestion_field].begin(); |
| 247 rule_field_it != rulesets[suggestion_field].end(); |
| 248 ++rule_field_it) { |
| 249 const Rule::IdentityField rule_identity_field = rule_field_it->first; |
| 250 for (Rulesets::const_iterator ruleset_it = rule_field_it->second.begin(); |
| 251 ruleset_it != rule_field_it->second.end(); |
| 252 ++ruleset_it) { |
| 253 suggestion_rulesets[*ruleset_it].set(rule_identity_field); |
| 254 } |
| 255 } |
| 256 |
| 257 // Generate suggestions based on the rulesets. Use a Rule::IdentityField |
| 258 // from the bitset to generate address field values. |
| 259 for (std::map<const Ruleset*, MatchingRuleFields>::const_iterator |
| 260 suggestion_it = suggestion_rulesets.begin(); |
| 261 suggestion_it != suggestion_rulesets.end(); |
| 262 ++suggestion_it) { |
| 263 const Ruleset& ruleset = *suggestion_it->first; |
| 264 const Rule& rule = ruleset.GetLanguageCodeRule(user_input.language_code); |
| 265 const MatchingRuleFields& matching_rule_fields = suggestion_it->second; |
| 266 |
| 267 // Do not suggest this region if the postal code in user input does not |
| 268 // match it. |
| 269 if (!user_input.postal_code.empty() && |
| 270 !rule.GetPostalCodeFormat().empty() && |
| 271 !ValueMatchesPrefixRegex( |
| 272 user_input.postal_code, rule.GetPostalCodeFormat())) { |
| 273 continue; |
| 274 } |
| 275 |
| 276 // Do not add more suggestions than |suggestions_limit|. |
| 277 if (suggestions->size() >= suggestions_limit) { |
| 278 suggestions->clear(); |
| 279 return SUCCESS; |
| 280 } |
| 281 |
| 282 // If the user's language is not one of the supported languages of a |
| 283 // country that has latinized names for its regions, then prefer to |
| 284 // suggest the latinized region names. If the user types in local script |
| 285 // instead, then the local script names will be suggested. |
| 286 Rule::IdentityField rule_field = Rule::KEY; |
| 287 if (!country_rule.GetLanguage().empty() && |
| 288 country_rule.GetLanguage() != user_input.language_code && |
| 289 !rule.GetLatinName().empty() && |
| 290 matching_rule_fields.test(Rule::LATIN_NAME)) { |
| 291 rule_field = Rule::LATIN_NAME; |
| 292 } else if (matching_rule_fields.test(Rule::KEY)) { |
| 293 rule_field = Rule::KEY; |
| 294 } else if (matching_rule_fields.test(Rule::NAME)) { |
| 295 rule_field = Rule::NAME; |
| 296 } else if (matching_rule_fields.test(Rule::LATIN_NAME)) { |
| 297 rule_field = Rule::LATIN_NAME; |
| 298 } else { |
| 299 assert(false); |
| 300 } |
| 301 |
| 302 AddressData suggestion; |
| 303 suggestion.region_code = user_input.region_code; |
| 304 suggestion.postal_code = user_input.postal_code; |
| 305 |
| 306 // Traverse the tree of rulesets from the most specific |ruleset| to the |
| 307 // country-wide "root" of the tree. Use the region names found at each of |
| 308 // the levels of the ruleset tree to build the |suggestion|. |
| 309 for (const Ruleset* suggestion_ruleset = &ruleset; |
| 310 suggestion_ruleset->parent() != NULL; |
| 311 suggestion_ruleset = suggestion_ruleset->parent()) { |
| 312 const Rule& suggestion_rule = |
| 313 suggestion_ruleset->GetLanguageCodeRule(user_input.language_code); |
| 314 suggestion.SetFieldValue(suggestion_ruleset->field(), |
| 315 suggestion_rule.GetIdentityField(rule_field)); |
| 316 } |
| 317 |
| 318 suggestions->push_back(suggestion); |
| 319 } |
| 320 |
| 321 return SUCCESS; |
| 322 } |
| 323 |
| 324 void Ruleset::AddSubRegionRulesetsToTrie(const Ruleset& parent_ruleset) { |
| 325 assert(field_ == COUNTRY); |
| 326 assert(canonicalizer_ != NULL); |
| 327 |
| 328 for (std::map<std::string, Ruleset*>::const_iterator sub_region_it = |
| 329 parent_ruleset.sub_regions_.begin(); |
| 330 sub_region_it != parent_ruleset.sub_regions_.end(); |
| 331 ++sub_region_it) { |
| 332 const Ruleset* ruleset = sub_region_it->second; |
| 333 assert(ruleset != NULL); |
| 334 |
| 335 if (deepest_ruleset_level_ < ruleset->field()) { |
| 336 deepest_ruleset_level_ = ruleset->field(); |
| 337 } |
| 338 |
| 339 for (LanguageCodeTries::const_iterator lang_it = tries_.begin(); |
| 340 lang_it != tries_.end(); ++lang_it) { |
| 341 const std::string& language_code = lang_it->first; |
| 342 const Rule& rule = ruleset->GetLanguageCodeRule(language_code); |
| 343 |
| 344 AddressFieldTries* address_field_tries = lang_it->second; |
| 345 assert(address_field_tries != NULL); |
| 346 |
| 347 AddressFieldTries::const_iterator address_field_it = |
| 348 address_field_tries->find(ruleset->field()); |
| 349 assert(address_field_it != address_field_tries->end()); |
| 350 |
| 351 IdentityFieldTries* identity_field_tries = address_field_it->second; |
| 352 assert(identity_field_tries != NULL); |
| 353 |
| 354 IdentityFieldTries::const_iterator identity_field_it = |
| 355 identity_field_tries->find(Rule::KEY); |
| 356 assert(identity_field_it != identity_field_tries->end()); |
| 357 |
| 358 Trie<const Ruleset*>* key_trie = identity_field_it->second; |
| 359 assert(key_trie != NULL); |
| 360 |
| 361 identity_field_it = identity_field_tries->find(Rule::NAME); |
| 362 assert(identity_field_it != identity_field_tries->end()); |
| 363 |
| 364 Trie<const Ruleset*>* name_trie = identity_field_it->second; |
| 365 assert(name_trie != NULL); |
| 366 |
| 367 identity_field_it = identity_field_tries->find(Rule::LATIN_NAME); |
| 368 assert(identity_field_it != identity_field_tries->end()); |
| 369 |
| 370 Trie<const Ruleset*>* latin_name_trie = identity_field_it->second; |
| 371 assert(latin_name_trie != NULL); |
| 372 |
| 373 if (!rule.GetKey().empty()) { |
| 374 key_trie->AddDataForKey( |
| 375 canonicalizer_->CanonicalizeString(rule.GetKey()), ruleset); |
| 376 } |
| 377 |
| 378 if (!rule.GetName().empty()) { |
| 379 name_trie->AddDataForKey( |
| 380 canonicalizer_->CanonicalizeString(rule.GetName()), ruleset); |
| 381 } |
| 382 |
| 383 if (!rule.GetLatinName().empty()) { |
| 384 latin_name_trie->AddDataForKey( |
| 385 canonicalizer_->CanonicalizeString(rule.GetLatinName()), ruleset); |
| 386 } |
| 387 } |
| 388 |
| 389 AddSubRegionRulesetsToTrie(*ruleset); |
| 390 } |
| 391 */ |
| 392 } |
| 393 |
| 394 void Suggestions::FindRegionsByPrefix(const std::string& region_code, |
| 395 const std::string& language_tag, |
| 396 AddressField address_field, |
| 397 RegionIdentityField region_identity_field, |
| 398 const std::string& canonicalized_prefix, |
| 399 RegionContainer* result) const { |
| 400 DCHECK_GE(address_field, ADMIN_AREA); |
| 401 DCHECK_LE(address_field, DEPENDENT_LOCALITY); |
| 402 DCHECK(result); |
| 403 |
| 404 RegionCodeMap::const_iterator region_it = tries_.find(region_code); |
| 405 if (region_it == tries_.end()) |
| 406 return; |
| 407 |
| 408 const LanguageTagMap* lang_map = region_it->second; |
| 409 DCHECK(lang_map); |
| 410 LanguageTagMap::const_iterator lang_it = lang_map->find(language_tag); |
| 411 if (lang_it == lang_map->end()) { |
| 412 if (!lang_map->empty()) |
| 413 lang_it = lang_map->begin(); |
| 414 else |
| 415 return; |
| 416 } |
| 417 |
| 418 const AddressFieldMap* field_map = lang_it->second; |
| 419 DCHECK(field_map); |
| 420 AddressFieldMap::const_iterator field_it = field_map->find(address_field); |
| 421 if (field_it == field_map->end()) |
| 422 return; |
| 423 |
| 424 const RegionIdMap* id_map = field_it->second; |
| 425 DCHECK(id_map); |
| 426 RegionIdMap::const_iterator id_it = id_map->find(region_identity_field); |
| 427 if (id_it == id_map->end()) |
| 428 return; |
| 429 |
| 430 const Trie<const RegionData*>* trie = id_it->second; |
| 431 DCHECK(trie); |
| 432 trie->FindDataForKeyPrefix(canonicalized_prefix, result); |
| 433 } |
| 434 |
| 435 } // namespace autofill |
OLD | NEW |