Chromium Code Reviews| Index: third_party/libaddressinput/chromium/suggestions.cc |
| diff --git a/third_party/libaddressinput/chromium/suggestions.cc b/third_party/libaddressinput/chromium/suggestions.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..e038b71c2f0aee0793847c41e56a1266a8e2f2b9 |
| --- /dev/null |
| +++ b/third_party/libaddressinput/chromium/suggestions.cc |
| @@ -0,0 +1,447 @@ |
| +// Copyright 2014 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "third_party/libaddressinput/chromium/suggestions.h" |
| + |
| +#include <bitset> |
| +#include <map> |
| +#include <queue> |
| +#include <set> |
| +#include <utility> |
| + |
| +#include "base/basictypes.h" |
| +#include "base/logging.h" |
| +#include "base/memory/scoped_ptr.h" |
| +#include "third_party/icu/source/common/unicode/errorcode.h" |
| +#include "third_party/icu/source/common/unicode/locid.h" |
| +#include "third_party/icu/source/common/unicode/unistr.h" |
| +#include "third_party/icu/source/common/unicode/utypes.h" |
| +#include "third_party/icu/source/i18n/unicode/coll.h" |
| +#include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_data.h" |
| +#include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_supplier.h" |
| +#include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_data.h" |
| + |
| +namespace autofill { |
| + |
| +using ::i18n::addressinput::AddressData; |
| +using ::i18n::addressinput::AddressField; |
| +using ::i18n::addressinput::ADMIN_AREA; |
| +using ::i18n::addressinput::COUNTRY; |
| +using ::i18n::addressinput::DEPENDENT_LOCALITY; |
| +using ::i18n::addressinput::POSTAL_CODE; |
| +using ::i18n::addressinput::PreloadSupplier; |
| +using ::i18n::addressinput::RegionData; |
| +using ::i18n::addressinput::RegionDataBuilder; |
| + |
| +namespace { |
| + |
| +// A type to store a set of pointers to RegionData objects. |
| +typedef std::set<const RegionData*> RegionContainer; |
| + |
| +// Collects regions based on whether they have a parent in the given list. |
| +class ParentedRegionCollector { |
| + public: |
| + // Retains a reference to both of the parameters. Does not make a copy of |
| + // |parent_regions|. Does not take ownership of |regions_with_parents|. The |
| + // |regions_with_parents| parameter should not be NULL. |
| + ParentedRegionCollector(const RegionContainer& parent_regions, |
| + RegionContainer* regions_with_parents) |
| + : parent_regions_(parent_regions), |
| + regions_with_parents_(regions_with_parents) { |
| + DCHECK(regions_with_parents_); |
| + } |
| + |
| + ~ParentedRegionCollector() {} |
| + |
| + // Adds |region_to_test| to the |regions_with_parents_| collection, if the |
| + // given region has a parent in |parent_regions_|. The |region_to_test| |
| + // parameter should not be NULL. |
| + void operator()(const RegionData* region_to_test) { |
| + DCHECK(region_to_test); |
| + if (parent_regions_.find(®ion_to_test->parent()) != |
| + parent_regions_.end()) { |
| + regions_with_parents_->insert(region_to_test); |
| + } |
| + } |
| + |
| + private: |
| + const RegionContainer parent_regions_; |
| + RegionContainer* regions_with_parents_; |
| +}; |
| + |
| +} // namespace |
| + |
| +class Suggestions::CanonicalizerImpl { |
| + public: |
| + CanonicalizerImpl() { |
| + UErrorCode error_code = U_ZERO_ERROR; |
| + collator_.reset( |
| + icu::Collator::createInstance(icu::Locale::getRoot(), error_code)); |
| + DCHECK(U_SUCCESS(error_code)); |
| + collator_->setStrength(icu::Collator::PRIMARY); |
| + } |
| + |
| + ~CanonicalizerImpl() {} |
| + |
| + // Returns a canonical version of the string that can be used for comparing |
| + // strings regardless of diacritics and capitalization. |
| + // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas"); |
| + // CanonicalizeString("Texas") == CanonicalizeString("teXas"); |
| + // CanonicalizeString("Texas") != CanonicalizeString("California"); |
| + // |
| + // The output is not human-readable. |
| + // CanonicalizeString("Texas") != "Texas"; |
| + std::string CanonicalizeString(const std::string& original) { |
| + icu::UnicodeString icu_str( |
| + original.c_str(), static_cast<int32_t>(original.length())); |
| + int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0); |
| + scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]); |
| + DCHECK(buffer.get()); |
| + int32_t filled_size = |
| + collator_->getSortKey(icu_str, buffer.get(), buffer_size); |
| + DCHECK_EQ(buffer_size, filled_size); |
| + return std::string(reinterpret_cast<const char*>(buffer.get())); |
| + } |
| + |
| + private: |
| + scoped_ptr<icu::Collator> collator_; |
| + |
| + DISALLOW_COPY_AND_ASSIGN(CanonicalizerImpl); |
| +}; |
| + |
| +Suggestions::Suggestions(PreloadSupplier* supplier) |
| + : region_data_builder_(supplier), |
| + canonicalizer_(new CanonicalizerImpl) {} |
| + |
| +Suggestions::~Suggestions() { |
| + // Delete the maps and Trie objects owned by |tries_| field. |
| + for (RegionMap::const_iterator region_it = tries_.begin(); |
|
please use gerrit instead
2014/06/05 22:22:49
Investigate: can we put RegionMap into its own str
|
| + region_it != tries_.end(); ++region_it) { |
| + FieldMap* field_map = region_it->second; |
| + DCHECK(field_map); |
| + |
| + for (FieldMap::const_iterator field_it = field_map->begin(); |
| + field_it != field_map->end(); ++field_it) { |
| + IdMap* id_map = field_it->second; |
| + DCHECK(id_map); |
| + |
| + for (IdMap::const_iterator id_it = id_map->begin(); |
| + id_it != id_map->end(); ++id_it) { |
| + RegionDataTrie* trie = id_it->second; |
| + delete trie; |
| + } |
| + |
| + delete id_map; |
| + } |
| + |
| + delete field_map; |
| + } |
| +} |
| + |
| +void Suggestions::GetSuggestions(const AddressData& user_input, |
| + AddressField focused_field, |
| + size_t suggestions_limit, |
| + std::vector<AddressData>* suggestions) { |
| + DCHECK(suggestions); |
| + suggestions->clear(); |
| + |
| + // Do not suggest anything if the user is typing in the field for which |
| + // there's no suggestion data. |
| + if (focused_field != POSTAL_CODE && |
|
please use gerrit instead
2014/06/05 22:22:49
Remove special handling for POSTAL CODE!
please use gerrit instead
2014/06/09 23:28:17
Simply removing handling of postal code will break
|
| + (focused_field < ADMIN_AREA || focused_field > DEPENDENT_LOCALITY)) { |
| + return; |
| + } |
| + |
| + // Do not suggest anything if the user input is empty. |
| + if (user_input.IsFieldEmpty(focused_field)) |
| + return; |
| + |
| + // Lazily initialize the mapping from fields to Trie objects. |
| + const FieldMap& field_map = GetTries(user_input); |
| + |
| + // Do not suggest anything if there's no suggestion data for any fields. |
| + if (field_map.empty()) |
| + return; |
| + |
| + // Do not suggest anything if there's no suggestion data for the focused |
| + // field. |
| + if (focused_field != POSTAL_CODE && |
| + field_map.find(focused_field) == field_map.end()) { |
| + return; |
| + } |
| + |
| + // Calculate the most specific field with suggestion data. |
| + AddressField deepest_field_with_data = DEPENDENT_LOCALITY; |
| + while (field_map.find(deepest_field_with_data) == field_map.end()) { |
| + deepest_field_with_data |
| + = static_cast<AddressField>(deepest_field_with_data - 1); |
| + } |
| + |
| + // Determine the most specific address field that can be suggested. |
| + AddressField most_specific_field = focused_field != POSTAL_CODE |
| + ? focused_field : DEPENDENT_LOCALITY; |
| + if (most_specific_field > deepest_field_with_data) { |
| + most_specific_field = deepest_field_with_data; |
| + } |
| + if (focused_field != POSTAL_CODE) { |
| + while (user_input.GetFieldValue(most_specific_field).empty() && |
| + most_specific_field > ADMIN_AREA) { |
| + most_specific_field = static_cast<AddressField>(most_specific_field - 1); |
| + } |
| + } |
| + |
| + // The regions that match the most precise fields in the address. (Locality is |
| + // more precise than administrative area, for example.) |
| + typedef std::bitset<REGION_IDENTITY_FIELDS_SIZE> MatchingIds; |
| + typedef std::map<const RegionData*, MatchingIds> RegionIds; |
| + scoped_ptr<RegionIds> precise_regions; |
|
please use gerrit instead
2014/06/05 22:22:49
Use more data structures that hide the complexity
|
| + |
| + { // Begin lifetime of |suggestion_regions|. |
| + // The regions that match all fields in the address. |
| + typedef std::map<AddressField, RegionIds*> FieldRegionIds; |
| + FieldRegionIds suggestion_regions; |
| + AddressField suggestion_field = ADMIN_AREA; |
| + |
| + { // Begin lifetime of |matching_regions|. |
| + // All regions that match at least one field in the address. |
| + typedef std::map<RegionIdentityField, RegionContainer*> IdRegionContainer; |
| + typedef std::map<AddressField, IdRegionContainer*> FieldRegionContainer; |
| + FieldRegionContainer matching_regions; |
| + |
| + // Find all regions that match user input. |
| + IdRegionContainer* parent_id_regions = NULL; |
| + for (int i = ADMIN_AREA; i <= most_specific_field; ++i) { |
| + AddressField address_field = static_cast<AddressField>(i); |
| + |
| + const std::string& canonical_user_input = |
| + canonicalizer_->CanonicalizeString( |
| + user_input.GetFieldValue(address_field)); |
| + |
| + IdRegionContainer* id_regions = new IdRegionContainer; |
| + matching_regions.insert(std::make_pair(address_field, id_regions)); |
| + |
| + FieldMap::const_iterator field_map_it = field_map.find(address_field); |
| + DCHECK(field_map_it != field_map.end()); |
| + const IdMap* id_map = field_map_it->second; |
| + DCHECK(id_map); |
| + |
| + for (IdMap::const_iterator id_map_it = id_map->begin(); |
| + id_map_it != id_map->end(); ++id_map_it) { |
| + RegionIdentityField id = id_map_it->first; |
| + DCHECK_LT(id, REGION_IDENTITY_FIELDS_SIZE); |
| + |
| + RegionContainer* regions = new RegionContainer; |
| + id_regions->insert(std::make_pair(id, regions)); |
| + |
| + const RegionDataTrie* trie = id_map_it->second; |
| + DCHECK(trie); |
| + |
| + trie->FindDataForKeyPrefix(canonical_user_input, regions); |
| + |
| + // Filter out the regions whose parents do not match the user input. |
| + if (address_field > ADMIN_AREA) { |
| + DCHECK(parent_id_regions); |
| + |
| + IdRegionContainer::const_iterator parent_id_regions_it = |
| + parent_id_regions->find(id); |
| + DCHECK(parent_id_regions_it != parent_id_regions->end()); |
| + |
| + RegionContainer* parent_regions = parent_id_regions_it->second; |
| + DCHECK(parent_regions); |
| + |
| + RegionContainer regions_with_parents; |
| + std::for_each(regions->begin(), regions->end(), |
| + ParentedRegionCollector( |
| + *parent_regions, ®ions_with_parents)); |
| + regions->swap(regions_with_parents); |
| + } |
| + } |
| + |
| + parent_id_regions = id_regions; |
| + } |
| + |
| + // Collect the RegionIdentityField into a single bitmask per RegionData. |
| + for (FieldRegionContainer::const_iterator field_it = |
| + matching_regions.begin(); |
| + field_it != matching_regions.end(); ++field_it) { |
| + AddressField address_field = field_it->first; |
| + const IdRegionContainer* id_regions = field_it->second; |
| + DCHECK(id_regions); |
| + |
| + for (IdRegionContainer::const_iterator id_it = id_regions->begin(); |
| + id_it != id_regions->end(); ++id_it) { |
| + RegionIdentityField id = id_it->first; |
| + const RegionContainer* regions = id_it->second; |
| + DCHECK(regions); |
| + |
| + for (RegionContainer::const_iterator region_it = regions->begin(); |
| + region_it != regions->end(); ++region_it) { |
| + const RegionData* region = *region_it; |
| + DCHECK(region); |
| + |
| + FieldRegionIds::iterator field_region_id_it = |
| + suggestion_regions.find(address_field); |
| + if (field_region_id_it == suggestion_regions.end()) { |
| + field_region_id_it = suggestion_regions.insert( |
| + std::make_pair(address_field, new RegionIds)).first; |
| + } |
| + DCHECK(field_region_id_it->second); |
| + |
| + RegionIds& region_ids = *(field_region_id_it->second); |
| + region_ids[region].set(id); |
| + |
| + if (suggestion_field < address_field) |
| + suggestion_field = address_field; |
| + } |
| + } |
| + } |
| + |
| + // Clean up |matching_regions|. |
| + for (FieldRegionContainer::const_iterator field_it = |
| + matching_regions.begin(); |
| + field_it != matching_regions.end(); ++field_it) { |
| + IdRegionContainer* id_regions = field_it->second; |
| + DCHECK(id_regions); |
| + for (IdRegionContainer::const_iterator id_it = id_regions->begin(); |
| + id_it != id_regions->end(); ++id_it) { |
| + RegionContainer* regions = id_it->second; |
| + delete regions; |
| + } |
| + delete id_regions; |
| + } |
| + } // End lifetime of |matching_regions|. |
| + |
| + // Filter out the less precise regions. |
| + FieldRegionIds::const_iterator suggestion_regions_it = |
| + suggestion_regions.find(suggestion_field); |
| + if (suggestion_regions_it == suggestion_regions.end()) |
| + return; |
| + |
| + precise_regions.reset(suggestion_regions_it->second); |
| + DCHECK(precise_regions); |
| + |
| + for (FieldRegionIds::iterator field_region_id_it = |
| + suggestion_regions.begin(); |
| + field_region_id_it != suggestion_regions.end(); |
| + ++field_region_id_it) { |
| + if (precise_regions.get() != field_region_id_it->second) |
| + delete field_region_id_it->second; |
| + } |
| + } // End lifetime of |suggestion_regions|. |
| + |
| + // Generate suggestions based on the regions. Use a RegionIdentityField from |
| + // the bitset to generate address field values. |
| + DCHECK(precise_regions); |
| + for (RegionIds::const_iterator suggestion_it = precise_regions->begin(); |
| + suggestion_it != precise_regions->end(); ++suggestion_it) { |
| + // Do not add more suggestions than |suggestions_limit|. |
| + if (suggestions->size() >= suggestions_limit) { |
| + suggestions->clear(); |
| + return; |
| + } |
| + |
| + const MatchingIds& matching_ids = suggestion_it->second; |
| + RegionIdentityField id = KEY; |
| + if (matching_ids.test(KEY)) { |
| + id = KEY; |
| + } else if (matching_ids.test(NAME)) { |
| + id = NAME; |
| + } else { |
| + NOTREACHED(); |
| + } |
| + |
| + AddressData suggestion; |
| + suggestion.region_code = user_input.region_code; |
| + suggestion.postal_code = user_input.postal_code; |
| + |
| + // Traverse the tree of regions from the most specific |region| to the |
| + // country-wide "root" of the tree. Use the region names found at each of |
| + // the levels of the ruleset tree to build the |suggestion|. |
| + std::vector<const RegionData*> suggestion_fields; |
| + DCHECK(suggestion_it->first); |
| + for (const RegionData* suggestion_region = suggestion_it->first; |
| + suggestion_region->has_parent(); |
| + suggestion_region = &suggestion_region->parent()) { |
| + suggestion_fields.insert(suggestion_fields.begin(), suggestion_region); |
| + } |
| + AddressField address_field = ADMIN_AREA; |
| + for (std::vector<const RegionData*>::const_iterator field_it = |
| + suggestion_fields.begin(); |
| + field_it != suggestion_fields.end(); ++field_it) { |
| + suggestion.SetFieldValue( |
| + address_field, |
| + id == KEY ? (*field_it)->key() : (*field_it)->name()); |
| + address_field = static_cast<AddressField>(address_field + 1); |
| + } |
| + |
| + suggestions->push_back(suggestion); |
| + } |
| +} |
| + |
| +void Suggestions::AddTriesForSubRegionsOf(const RegionData& parent, |
| + AddressField parent_field, |
| + FieldMap* field_map) { |
| + DCHECK(!parent.sub_regions().empty()); |
| + DCHECK(field_map); |
| + |
| + AddressField field = static_cast<AddressField>(parent_field + 1); |
| + DCHECK_GE(field, ADMIN_AREA); |
| + DCHECK_LE(field, DEPENDENT_LOCALITY); |
| + |
| + FieldMap::iterator field_it = field_map->find(field); |
| + if (field_it == field_map->end()) |
| + field_it = field_map->insert(std::make_pair(field, new IdMap)).first; |
| + |
| + IdMap* id_map = field_it->second; |
| + DCHECK(id_map); |
| + |
| + IdMap::iterator key_it = id_map->find(KEY); |
| + if (key_it == id_map->end()) |
| + key_it = id_map->insert(std::make_pair(KEY, new RegionDataTrie)).first; |
| + RegionDataTrie* key_trie = key_it->second; |
| + DCHECK(key_trie); |
| + |
| + IdMap::iterator name_it = id_map->find(NAME); |
| + if (name_it == id_map->end()) |
| + name_it = id_map->insert(std::make_pair(NAME, new RegionDataTrie)).first; |
| + RegionDataTrie* name_trie = name_it->second; |
| + DCHECK(name_trie); |
| + |
| + for (std::vector<const RegionData*>::const_iterator region_it = |
| + parent.sub_regions().begin(); |
| + region_it != parent.sub_regions().end(); ++region_it) { |
| + const RegionData* region = *region_it; |
| + DCHECK(region); |
| + |
| + key_trie->AddDataForKey( |
| + canonicalizer_->CanonicalizeString(region->key()), region); |
| + |
| + if (!region->name().empty()) { |
| + name_trie->AddDataForKey( |
| + canonicalizer_->CanonicalizeString(region->name()), region); |
| + } |
| + |
| + if (!region->sub_regions().empty()) |
| + AddTriesForSubRegionsOf(*region, field, field_map); |
| + } |
| +} |
| + |
| +const Suggestions::FieldMap& Suggestions::GetTries( |
| + const AddressData& user_input) { |
| + std::string unused_best_region_tree_language_tag; |
| + const RegionData& region_data = region_data_builder_.Build( |
| + user_input.region_code, user_input.language_code, |
| + &unused_best_region_tree_language_tag); |
| + |
| + RegionMap::iterator tries_it = tries_.find(®ion_data); |
| + if (tries_it == tries_.end()) { |
| + tries_it = tries_.insert(std::make_pair(®ion_data, new FieldMap)).first; |
| + if (!region_data.sub_regions().empty()) |
| + AddTriesForSubRegionsOf(region_data, COUNTRY, tries_it->second); |
| + } |
| + |
| + return *(tries_it->second); |
| +} |
| + |
| +} // namespace autofill |