Index: third_party/libaddressinput/chromium/input_suggester.cc |
diff --git a/third_party/libaddressinput/chromium/input_suggester.cc b/third_party/libaddressinput/chromium/input_suggester.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..1c1f4dfd4c365759335cda9fd33bddb279933f2c |
--- /dev/null |
+++ b/third_party/libaddressinput/chromium/input_suggester.cc |
@@ -0,0 +1,484 @@ |
+// Copyright 2014 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "third_party/libaddressinput/chromium/input_suggester.h" |
+ |
+#include <map> |
+#include <set> |
+#include <utility> |
+ |
+#include "base/basictypes.h" |
+#include "base/logging.h" |
+#include "base/memory/scoped_ptr.h" |
+#include "third_party/icu/source/common/unicode/errorcode.h" |
+#include "third_party/icu/source/common/unicode/locid.h" |
+#include "third_party/icu/source/common/unicode/unistr.h" |
+#include "third_party/icu/source/common/unicode/utypes.h" |
+#include "third_party/icu/source/i18n/unicode/coll.h" |
+#include "third_party/libaddressinput/chromium/trie.h" |
+#include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_data.h" |
+#include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_supplier.h" |
+#include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_data.h" |
+#include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_data_builder.h" |
+ |
+namespace autofill { |
+ |
+using ::i18n::addressinput::AddressData; |
+using ::i18n::addressinput::AddressField; |
+using ::i18n::addressinput::ADMIN_AREA; |
+using ::i18n::addressinput::COUNTRY; |
+using ::i18n::addressinput::DEPENDENT_LOCALITY; |
+using ::i18n::addressinput::POSTAL_CODE; |
+using ::i18n::addressinput::PreloadSupplier; |
+using ::i18n::addressinput::RegionData; |
+using ::i18n::addressinput::RegionDataBuilder; |
+ |
+namespace { |
+ |
+// The types of fields that describe a region. |
+enum RegionField { |
+ KEY, |
+ NAME, |
+}; |
+ |
+// A bitset used for RegionField enum. |
+typedef int Bitset; |
+ |
+// A type to store a set of pointers to RegionData objects. |
+typedef std::set<const RegionData*> RegionContainer; |
+ |
+// Returns bitset with |bit| already set. |
+Bitset BitsetFromBit(int bit) { |
+ return 1 << bit; |
+} |
+ |
+// Returns a union of the bitests |a| and |b|. |
+Bitset AddBitsets(Bitset a, Bitset b) { |
+ return a | b; |
+} |
+ |
+// Returns true if |bit| is set in the |bitset|. |
+bool TestBit(Bitset bitset, int bit) { |
Evan Stade
2014/06/19 00:38:00
this is not the idiomatic way to deal with bitmask
please use gerrit instead
2014/06/20 21:09:15
Got rid of bitmaps all together.
|
+ return !!(bitset & (1 << bit)); |
+} |
+ |
+// If |KEY| bit is set in the |value| bitset, then returns the key of the |
+// region. If |NAME| bit is set in the |value| bitset, then returns the name of |
+// the region. The |value| bitset should not be empty. |
+const std::string& GetValueFromRegion(const RegionData* region, Bitset value) { |
+ DCHECK(region); |
+ |
+ if (TestBit(value, KEY)) |
+ return region->key(); |
+ else if (TestBit(value, NAME)) |
+ return region->name(); |
+ else |
+ NOTREACHED(); |
+ |
+ return region->key(); |
+} |
+ |
+// Collects regions based on whether they have a parent in the given list. |
+class ParentedRegionCollector { |
Evan Stade
2014/06/19 00:38:00
you created this entire class so you could use std
please use gerrit instead
2014/06/20 21:09:15
Removed and came up with a better implementation.
|
+ public: |
+ // Retains a reference to both of the parameters. Does not make a copy of |
+ // |parent_regions|. Does not take ownership of |regions_with_parents|. The |
+ // |regions_with_parents| parameter should not be NULL. |
+ ParentedRegionCollector(const RegionContainer& parent_regions, |
+ RegionContainer* regions_with_parents) |
+ : parent_regions_(parent_regions), |
+ regions_with_parents_(regions_with_parents) { |
+ DCHECK(regions_with_parents_); |
+ } |
+ |
+ ~ParentedRegionCollector() {} |
+ |
+ // Adds |region_to_test| to the |regions_with_parents_| collection, if the |
+ // given region has a parent in |parent_regions_|. The |region_to_test| |
+ // parameter should not be NULL. |
+ void operator()(const RegionData* region_to_test) { |
+ DCHECK(region_to_test); |
+ if (parent_regions_.find(®ion_to_test->parent()) != |
+ parent_regions_.end()) { |
+ regions_with_parents_->insert(region_to_test); |
+ } |
+ } |
+ |
+ private: |
+ const RegionContainer& parent_regions_; |
+ RegionContainer* regions_with_parents_; |
+}; |
+ |
+// A region and its metadata useful for constructing a suggestion. |
+struct SuggestionRegion { |
+ SuggestionRegion(const RegionData* region, |
+ Bitset region_fields, |
+ AddressField address_field) |
+ : region(region), |
+ region_fields(region_fields), |
+ address_field(address_field) {} |
+ |
+ ~SuggestionRegion() {} |
+ |
+ // Not owned region data. |
+ const RegionData* region; |
+ |
+ // A bitset of region fields of the |region|. |
+ Bitset region_fields; |
+ |
+ // The address field of the |region|. |
+ AddressField address_field; |
+}; |
+ |
+// Removes duplicates based on RegionData and sums up the region field bitsets |
+// of the duplicates. |
+void RemoveDuplicateRegions(std::vector<SuggestionRegion>* regions) { |
+ DCHECK(regions); |
+ |
+ std::map<const RegionData*, SuggestionRegion*> uniques; |
+ for (std::vector<SuggestionRegion>::iterator region_it = regions->begin(); |
+ region_it != regions->end(); ++region_it) { |
+ SuggestionRegion* lookup = &(*region_it); |
+ std::map<const RegionData*, SuggestionRegion*>::iterator unique_it = |
+ uniques.find(lookup->region); |
+ if (unique_it == uniques.end()) |
+ unique_it = uniques.insert(std::make_pair(lookup->region, lookup)).first; |
+ SuggestionRegion* target = unique_it->second; |
+ target->region_fields = |
+ AddBitsets(target->region_fields, lookup->region_fields); |
+ } |
+ |
+ std::vector<SuggestionRegion> result; |
+ for (std::map<const RegionData*, SuggestionRegion*>::const_iterator |
+ unique_it = uniques.begin(); |
+ unique_it != uniques.end(); ++unique_it) { |
+ result.push_back(*unique_it->second); |
+ } |
+ regions->swap(result); |
+} |
+ |
+} // namespace |
+ |
+// Builds and caches tries from region data. |
+class InputSuggester::TrieCollection { |
+ public: |
+ TrieCollection(PreloadSupplier* supplier) |
+ : region_data_builder_(supplier) { |
+ UErrorCode error_code = U_ZERO_ERROR; |
+ collator_.reset( |
+ icu::Collator::createInstance(icu::Locale::getRoot(), error_code)); |
+ DCHECK(U_SUCCESS(error_code)); |
+ collator_->setStrength(icu::Collator::PRIMARY); |
+ } |
+ |
+ ~TrieCollection() { |
+ // Delete the maps and Trie objects owned by |tries_| field. |
+ for (RegionMap::const_iterator region_it = tries_.begin(); |
+ region_it != tries_.end(); ++region_it) { |
+ FieldMap* field_map = region_it->second; |
+ DCHECK(field_map); |
+ |
+ for (FieldMap::const_iterator field_it = field_map->begin(); |
+ field_it != field_map->end(); ++field_it) { |
+ IdMap* id_map = field_it->second; |
+ DCHECK(id_map); |
+ |
+ for (IdMap::const_iterator id_it = id_map->begin(); |
+ id_it != id_map->end(); ++id_it) { |
+ RegionDataTrie* trie = id_it->second; |
+ delete trie; |
+ } |
+ |
+ delete id_map; |
+ } |
+ |
+ delete field_map; |
+ } |
+ } |
+ |
+ // Builds a list of regions that can be suggested as completions for |
+ // |user_input| when the user is typing into |focused_field|. |
+ std::vector<SuggestionRegion> BuildSuggestionRegions( |
+ const AddressData& user_input, |
+ AddressField focused_field) { |
+ std::vector<SuggestionRegion> result; |
+ |
+ // Lazily initialize the mapping from fields to Trie objects. |
+ std::string unused_best_language; |
+ const RegionData& region_data = region_data_builder_.Build( |
+ user_input.region_code, user_input.language_code, |
+ &unused_best_language); |
+ RegionMap::iterator tries_it = tries_.find(®ion_data); |
+ if (tries_it == tries_.end()) { |
+ tries_it = tries_.insert( |
+ std::make_pair(®ion_data, new FieldMap)).first; |
+ if (!region_data.sub_regions().empty()) |
+ AddTriesForSubRegionsOf(region_data, COUNTRY, tries_it->second); |
+ } |
+ const FieldMap& field_map = *(tries_it->second); |
+ |
+ // Do not suggest anything if there's no suggestion data for any fields. |
+ if (field_map.empty()) |
+ return result; |
+ |
+ // Do not suggest anything if there's no suggestion data for the focused |
+ // field. |
+ if (focused_field != POSTAL_CODE && |
+ field_map.find(focused_field) == field_map.end()) { |
+ return result; |
+ } |
+ |
+ // Calculate the most specific field with suggestion data. |
+ AddressField deepest_field_with_data = DEPENDENT_LOCALITY; |
+ while (field_map.find(deepest_field_with_data) == field_map.end()) { |
+ deepest_field_with_data = |
+ static_cast<AddressField>(deepest_field_with_data - 1); |
+ } |
+ |
+ // Determine the most specific address field that can be suggested. |
+ AddressField most_specific_field = focused_field != POSTAL_CODE |
+ ? focused_field : DEPENDENT_LOCALITY; |
+ if (most_specific_field > deepest_field_with_data) { |
+ most_specific_field = deepest_field_with_data; |
+ } |
+ if (focused_field != POSTAL_CODE) { |
+ while (user_input.GetFieldValue(most_specific_field).empty() && |
+ most_specific_field > ADMIN_AREA) { |
+ most_specific_field = |
+ static_cast<AddressField>(most_specific_field - 1); |
+ } |
+ } |
+ |
+ // Canonicalized user input data for lookup in the tries. |
+ AddressData canonical_input = user_input; |
+ for (int i = ADMIN_AREA; i <= most_specific_field; ++i) { |
+ AddressField address_field = static_cast<AddressField>(i); |
+ canonical_input.SetFieldValue( |
+ address_field, |
+ CanonicalizeString(user_input.GetFieldValue(address_field))); |
+ } |
+ |
+ AddressField suggestion_field = ADMIN_AREA; |
+ |
+ typedef std::map<RegionField, RegionContainer*> IdRegionContainer; |
+ typedef std::map<AddressField, IdRegionContainer*> FieldRegionContainer; |
+ FieldRegionContainer matching_regions; |
+ |
+ IdRegionContainer* parent_id_regions = NULL; |
+ for (int i = ADMIN_AREA; i <= most_specific_field; ++i) { |
+ AddressField address_field = static_cast<AddressField>(i); |
+ |
+ IdRegionContainer* id_regions = new IdRegionContainer; |
+ matching_regions.insert(std::make_pair(address_field, id_regions)); |
+ |
+ FieldMap::const_iterator field_map_it = field_map.find(address_field); |
+ DCHECK(field_map_it != field_map.end()); |
+ const IdMap* id_map = field_map_it->second; |
+ DCHECK(id_map); |
+ |
+ for (IdMap::const_iterator id_map_it = id_map->begin(); |
+ id_map_it != id_map->end(); ++id_map_it) { |
+ RegionField id = id_map_it->first; |
+ |
+ RegionContainer* regions = new RegionContainer; |
+ id_regions->insert(std::make_pair(id, regions)); |
+ |
+ const RegionDataTrie* trie = id_map_it->second; |
+ DCHECK(trie); |
+ |
+ trie->FindDataForKeyPrefix( |
+ canonical_input.GetFieldValue(address_field), regions); |
+ |
+ // Filter out the regions whose parents do not match the user input. |
+ if (address_field > ADMIN_AREA) { |
+ DCHECK(parent_id_regions); |
+ |
+ IdRegionContainer::const_iterator parent_id_regions_it = |
+ parent_id_regions->find(id); |
+ DCHECK(parent_id_regions_it != parent_id_regions->end()); |
+ |
+ RegionContainer* parent_regions = parent_id_regions_it->second; |
+ DCHECK(parent_regions); |
+ |
+ RegionContainer regions_with_parents; |
+ std::for_each(regions->begin(), regions->end(), |
+ ParentedRegionCollector( |
+ *parent_regions, ®ions_with_parents)); |
+ regions->swap(regions_with_parents); |
+ } |
+ |
+ for (RegionContainer::const_iterator region_it = regions->begin(); |
+ region_it != regions->end(); ++region_it) { |
+ if (address_field > suggestion_field) { |
+ suggestion_field = address_field; |
+ result.clear(); |
+ } |
+ result.push_back( |
+ SuggestionRegion(*region_it, BitsetFromBit(id), address_field)); |
+ } |
+ } |
+ |
+ parent_id_regions = id_regions; |
+ } |
+ |
+ // Cleanup. |
Evan Stade
2014/06/19 00:38:00
please do not try to handle bare pointers, use stl
please use gerrit instead
2014/06/20 21:09:15
Done.
|
+ for (FieldRegionContainer::const_iterator |
+ field_it = matching_regions.begin(); |
+ field_it != matching_regions.end(); ++field_it) { |
+ IdRegionContainer* id_regions = field_it->second; |
+ DCHECK(id_regions); |
+ for (IdRegionContainer::const_iterator id_it = id_regions->begin(); |
+ id_it != id_regions->end(); ++id_it) { |
+ RegionContainer* regions = id_it->second; |
+ delete regions; |
+ } |
+ delete id_regions; |
+ } |
+ |
+ RemoveDuplicateRegions(&result); |
+ return result; |
+ } |
+ |
+ private: |
+ typedef Trie<const RegionData*> RegionDataTrie; |
+ typedef std::map<RegionField, RegionDataTrie*> IdMap; |
Evan Stade
2014/06/19 00:38:00
serious? you need a map for this? RegionField only
please use gerrit instead
2014/06/20 21:09:15
Switched to structs and classes.
|
+ typedef std::map<AddressField, IdMap*> FieldMap; |
+ typedef std::map<const RegionData*, FieldMap*> RegionMap; |
+ |
+ // Recursively builds tries from subregions of |parent| and add these tries to |
+ // the |field_map|. |
+ void AddTriesForSubRegionsOf(const RegionData& parent, |
+ AddressField parent_field, |
+ FieldMap* field_map) { |
+ DCHECK(!parent.sub_regions().empty()); |
+ DCHECK(field_map); |
+ |
+ AddressField field = static_cast<AddressField>(parent_field + 1); |
+ DCHECK_GE(field, ADMIN_AREA); |
+ DCHECK_LE(field, DEPENDENT_LOCALITY); |
+ |
+ FieldMap::iterator field_it = field_map->find(field); |
+ if (field_it == field_map->end()) |
+ field_it = field_map->insert(std::make_pair(field, new IdMap)).first; |
+ |
+ IdMap* id_map = field_it->second; |
+ DCHECK(id_map); |
+ |
+ IdMap::iterator key_it = id_map->find(KEY); |
+ if (key_it == id_map->end()) |
+ key_it = id_map->insert(std::make_pair(KEY, new RegionDataTrie)).first; |
+ RegionDataTrie* key_trie = key_it->second; |
+ DCHECK(key_trie); |
+ |
+ IdMap::iterator name_it = id_map->find(NAME); |
+ if (name_it == id_map->end()) |
+ name_it = id_map->insert(std::make_pair(NAME, new RegionDataTrie)).first; |
+ RegionDataTrie* name_trie = name_it->second; |
+ DCHECK(name_trie); |
+ |
+ for (std::vector<const RegionData*>::const_iterator region_it = |
+ parent.sub_regions().begin(); |
+ region_it != parent.sub_regions().end(); ++region_it) { |
+ const RegionData* region = *region_it; |
+ DCHECK(region); |
+ |
+ key_trie->AddDataForKey(CanonicalizeString(region->key()), region); |
+ |
+ if (!region->name().empty()) { |
+ name_trie->AddDataForKey(CanonicalizeString(region->name()), region); |
+ } |
+ |
+ if (!region->sub_regions().empty()) |
+ AddTriesForSubRegionsOf(*region, field, field_map); |
+ } |
+ } |
+ |
+ // Returns a canonical version of the string that can be used for comparing |
+ // strings regardless of diacritics and capitalization. |
+ // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas"); |
+ // CanonicalizeString("Texas") == CanonicalizeString("teXas"); |
+ // CanonicalizeString("Texas") != CanonicalizeString("California"); |
+ // |
+ // The output is not human-readable. |
+ // CanonicalizeString("Texas") != "Texas"; |
+ std::string CanonicalizeString(const std::string& original) const { |
+ icu::UnicodeString icu_str( |
+ original.c_str(), static_cast<int32_t>(original.length())); |
+ int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0); |
+ scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]); |
+ DCHECK(buffer.get()); |
+ int32_t filled_size = |
+ collator_->getSortKey(icu_str, buffer.get(), buffer_size); |
+ DCHECK_EQ(buffer_size, filled_size); |
+ return std::string(reinterpret_cast<const char*>(buffer.get())); |
+ } |
+ |
+ // A mapping of the country-level RegionData objects to a collection of Trie |
+ // objects. Tries and maps are owned. RegionDatas are not owned. |
+ RegionMap tries_; |
+ |
+ // Data source for region data. |
+ RegionDataBuilder region_data_builder_; |
+ |
+ // Canonicalizes strings for case and diacritic insensitive search. |
+ scoped_ptr<icu::Collator> collator_; |
+ |
+ DISALLOW_COPY_AND_ASSIGN(TrieCollection); |
+}; |
+ |
+InputSuggester::InputSuggester(PreloadSupplier* supplier) |
+ : trie_collection_(new TrieCollection(supplier)) {} |
+ |
+InputSuggester::~InputSuggester() {} |
+ |
+void InputSuggester::GetSuggestions(const AddressData& user_input, |
+ AddressField focused_field, |
+ size_t suggestions_limit, |
+ std::vector<AddressData>* suggestions) { |
+ DCHECK(suggestions); |
+ DCHECK(focused_field == POSTAL_CODE || |
+ (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY)); |
+ |
+ // Do not suggest anything if the user input is empty. |
+ if (user_input.IsFieldEmpty(focused_field)) |
+ return; |
+ |
+ // Build the list of regions that match |user_input| when the user is typing |
+ // in the |focused_field|. |
+ const std::vector<SuggestionRegion>& regions = |
+ trie_collection_->BuildSuggestionRegions(user_input, focused_field); |
+ |
+ // Generate suggestions based on the regions. |
+ for (std::vector<SuggestionRegion>::const_iterator |
+ region_it = regions.begin(); |
+ region_it != regions.end(); ++region_it) { |
+ // Do not add more suggestions than |suggestions_limit|. |
+ if (suggestions->size() >= suggestions_limit) { |
+ suggestions->clear(); |
+ return; |
+ } |
+ |
+ AddressData suggestion; |
+ suggestion.region_code = user_input.region_code; |
+ suggestion.postal_code = user_input.postal_code; |
+ |
+ // Traverse the tree of regions from the most specific |region| to the |
+ // country-wide "root" of the tree. Use the region names found at each of |
+ // the levels of the ruleset tree to build the |suggestion|. |
+ DCHECK(region_it->region); |
+ AddressField address_field = region_it->address_field; |
+ for (const RegionData* suggestion_region = region_it->region; |
+ suggestion_region->has_parent(); |
+ suggestion_region = &suggestion_region->parent()) { |
+ suggestion.SetFieldValue( |
+ address_field, |
+ GetValueFromRegion(suggestion_region, region_it->region_fields)); |
+ address_field = static_cast<AddressField>(address_field - 1); |
+ } |
+ |
+ suggestions->push_back(suggestion); |
+ } |
+} |
+ |
+} // namespace autofill |