Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(490)

Unified Diff: third_party/libaddressinput/chromium/input_suggester.cc

Issue 298863012: Use upstream libaddressinput in Chrome. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Validate required fields without rules. Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/libaddressinput/chromium/input_suggester.cc
diff --git a/third_party/libaddressinput/chromium/input_suggester.cc b/third_party/libaddressinput/chromium/input_suggester.cc
new file mode 100644
index 0000000000000000000000000000000000000000..1c1f4dfd4c365759335cda9fd33bddb279933f2c
--- /dev/null
+++ b/third_party/libaddressinput/chromium/input_suggester.cc
@@ -0,0 +1,484 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "third_party/libaddressinput/chromium/input_suggester.h"
+
+#include <map>
+#include <set>
+#include <utility>
+
+#include "base/basictypes.h"
+#include "base/logging.h"
+#include "base/memory/scoped_ptr.h"
+#include "third_party/icu/source/common/unicode/errorcode.h"
+#include "third_party/icu/source/common/unicode/locid.h"
+#include "third_party/icu/source/common/unicode/unistr.h"
+#include "third_party/icu/source/common/unicode/utypes.h"
+#include "third_party/icu/source/i18n/unicode/coll.h"
+#include "third_party/libaddressinput/chromium/trie.h"
+#include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_data.h"
+#include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_supplier.h"
+#include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_data.h"
+#include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_data_builder.h"
+
+namespace autofill {
+
+using ::i18n::addressinput::AddressData;
+using ::i18n::addressinput::AddressField;
+using ::i18n::addressinput::ADMIN_AREA;
+using ::i18n::addressinput::COUNTRY;
+using ::i18n::addressinput::DEPENDENT_LOCALITY;
+using ::i18n::addressinput::POSTAL_CODE;
+using ::i18n::addressinput::PreloadSupplier;
+using ::i18n::addressinput::RegionData;
+using ::i18n::addressinput::RegionDataBuilder;
+
+namespace {
+
+// The types of fields that describe a region.
+enum RegionField {
+ KEY,
+ NAME,
+};
+
+// A bitset used for RegionField enum.
+typedef int Bitset;
+
+// A type to store a set of pointers to RegionData objects.
+typedef std::set<const RegionData*> RegionContainer;
+
+// Returns bitset with |bit| already set.
+Bitset BitsetFromBit(int bit) {
+ return 1 << bit;
+}
+
+// Returns a union of the bitests |a| and |b|.
+Bitset AddBitsets(Bitset a, Bitset b) {
+ return a | b;
+}
+
+// Returns true if |bit| is set in the |bitset|.
+bool TestBit(Bitset bitset, int bit) {
Evan Stade 2014/06/19 00:38:00 this is not the idiomatic way to deal with bitmask
please use gerrit instead 2014/06/20 21:09:15 Got rid of bitmaps all together.
+ return !!(bitset & (1 << bit));
+}
+
+// If |KEY| bit is set in the |value| bitset, then returns the key of the
+// region. If |NAME| bit is set in the |value| bitset, then returns the name of
+// the region. The |value| bitset should not be empty.
+const std::string& GetValueFromRegion(const RegionData* region, Bitset value) {
+ DCHECK(region);
+
+ if (TestBit(value, KEY))
+ return region->key();
+ else if (TestBit(value, NAME))
+ return region->name();
+ else
+ NOTREACHED();
+
+ return region->key();
+}
+
+// Collects regions based on whether they have a parent in the given list.
+class ParentedRegionCollector {
Evan Stade 2014/06/19 00:38:00 you created this entire class so you could use std
please use gerrit instead 2014/06/20 21:09:15 Removed and came up with a better implementation.
+ public:
+ // Retains a reference to both of the parameters. Does not make a copy of
+ // |parent_regions|. Does not take ownership of |regions_with_parents|. The
+ // |regions_with_parents| parameter should not be NULL.
+ ParentedRegionCollector(const RegionContainer& parent_regions,
+ RegionContainer* regions_with_parents)
+ : parent_regions_(parent_regions),
+ regions_with_parents_(regions_with_parents) {
+ DCHECK(regions_with_parents_);
+ }
+
+ ~ParentedRegionCollector() {}
+
+ // Adds |region_to_test| to the |regions_with_parents_| collection, if the
+ // given region has a parent in |parent_regions_|. The |region_to_test|
+ // parameter should not be NULL.
+ void operator()(const RegionData* region_to_test) {
+ DCHECK(region_to_test);
+ if (parent_regions_.find(&region_to_test->parent()) !=
+ parent_regions_.end()) {
+ regions_with_parents_->insert(region_to_test);
+ }
+ }
+
+ private:
+ const RegionContainer& parent_regions_;
+ RegionContainer* regions_with_parents_;
+};
+
+// A region and its metadata useful for constructing a suggestion.
+struct SuggestionRegion {
+ SuggestionRegion(const RegionData* region,
+ Bitset region_fields,
+ AddressField address_field)
+ : region(region),
+ region_fields(region_fields),
+ address_field(address_field) {}
+
+ ~SuggestionRegion() {}
+
+ // Not owned region data.
+ const RegionData* region;
+
+ // A bitset of region fields of the |region|.
+ Bitset region_fields;
+
+ // The address field of the |region|.
+ AddressField address_field;
+};
+
+// Removes duplicates based on RegionData and sums up the region field bitsets
+// of the duplicates.
+void RemoveDuplicateRegions(std::vector<SuggestionRegion>* regions) {
+ DCHECK(regions);
+
+ std::map<const RegionData*, SuggestionRegion*> uniques;
+ for (std::vector<SuggestionRegion>::iterator region_it = regions->begin();
+ region_it != regions->end(); ++region_it) {
+ SuggestionRegion* lookup = &(*region_it);
+ std::map<const RegionData*, SuggestionRegion*>::iterator unique_it =
+ uniques.find(lookup->region);
+ if (unique_it == uniques.end())
+ unique_it = uniques.insert(std::make_pair(lookup->region, lookup)).first;
+ SuggestionRegion* target = unique_it->second;
+ target->region_fields =
+ AddBitsets(target->region_fields, lookup->region_fields);
+ }
+
+ std::vector<SuggestionRegion> result;
+ for (std::map<const RegionData*, SuggestionRegion*>::const_iterator
+ unique_it = uniques.begin();
+ unique_it != uniques.end(); ++unique_it) {
+ result.push_back(*unique_it->second);
+ }
+ regions->swap(result);
+}
+
+} // namespace
+
+// Builds and caches tries from region data.
+class InputSuggester::TrieCollection {
+ public:
+ TrieCollection(PreloadSupplier* supplier)
+ : region_data_builder_(supplier) {
+ UErrorCode error_code = U_ZERO_ERROR;
+ collator_.reset(
+ icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
+ DCHECK(U_SUCCESS(error_code));
+ collator_->setStrength(icu::Collator::PRIMARY);
+ }
+
+ ~TrieCollection() {
+ // Delete the maps and Trie objects owned by |tries_| field.
+ for (RegionMap::const_iterator region_it = tries_.begin();
+ region_it != tries_.end(); ++region_it) {
+ FieldMap* field_map = region_it->second;
+ DCHECK(field_map);
+
+ for (FieldMap::const_iterator field_it = field_map->begin();
+ field_it != field_map->end(); ++field_it) {
+ IdMap* id_map = field_it->second;
+ DCHECK(id_map);
+
+ for (IdMap::const_iterator id_it = id_map->begin();
+ id_it != id_map->end(); ++id_it) {
+ RegionDataTrie* trie = id_it->second;
+ delete trie;
+ }
+
+ delete id_map;
+ }
+
+ delete field_map;
+ }
+ }
+
+ // Builds a list of regions that can be suggested as completions for
+ // |user_input| when the user is typing into |focused_field|.
+ std::vector<SuggestionRegion> BuildSuggestionRegions(
+ const AddressData& user_input,
+ AddressField focused_field) {
+ std::vector<SuggestionRegion> result;
+
+ // Lazily initialize the mapping from fields to Trie objects.
+ std::string unused_best_language;
+ const RegionData& region_data = region_data_builder_.Build(
+ user_input.region_code, user_input.language_code,
+ &unused_best_language);
+ RegionMap::iterator tries_it = tries_.find(&region_data);
+ if (tries_it == tries_.end()) {
+ tries_it = tries_.insert(
+ std::make_pair(&region_data, new FieldMap)).first;
+ if (!region_data.sub_regions().empty())
+ AddTriesForSubRegionsOf(region_data, COUNTRY, tries_it->second);
+ }
+ const FieldMap& field_map = *(tries_it->second);
+
+ // Do not suggest anything if there's no suggestion data for any fields.
+ if (field_map.empty())
+ return result;
+
+ // Do not suggest anything if there's no suggestion data for the focused
+ // field.
+ if (focused_field != POSTAL_CODE &&
+ field_map.find(focused_field) == field_map.end()) {
+ return result;
+ }
+
+ // Calculate the most specific field with suggestion data.
+ AddressField deepest_field_with_data = DEPENDENT_LOCALITY;
+ while (field_map.find(deepest_field_with_data) == field_map.end()) {
+ deepest_field_with_data =
+ static_cast<AddressField>(deepest_field_with_data - 1);
+ }
+
+ // Determine the most specific address field that can be suggested.
+ AddressField most_specific_field = focused_field != POSTAL_CODE
+ ? focused_field : DEPENDENT_LOCALITY;
+ if (most_specific_field > deepest_field_with_data) {
+ most_specific_field = deepest_field_with_data;
+ }
+ if (focused_field != POSTAL_CODE) {
+ while (user_input.GetFieldValue(most_specific_field).empty() &&
+ most_specific_field > ADMIN_AREA) {
+ most_specific_field =
+ static_cast<AddressField>(most_specific_field - 1);
+ }
+ }
+
+ // Canonicalized user input data for lookup in the tries.
+ AddressData canonical_input = user_input;
+ for (int i = ADMIN_AREA; i <= most_specific_field; ++i) {
+ AddressField address_field = static_cast<AddressField>(i);
+ canonical_input.SetFieldValue(
+ address_field,
+ CanonicalizeString(user_input.GetFieldValue(address_field)));
+ }
+
+ AddressField suggestion_field = ADMIN_AREA;
+
+ typedef std::map<RegionField, RegionContainer*> IdRegionContainer;
+ typedef std::map<AddressField, IdRegionContainer*> FieldRegionContainer;
+ FieldRegionContainer matching_regions;
+
+ IdRegionContainer* parent_id_regions = NULL;
+ for (int i = ADMIN_AREA; i <= most_specific_field; ++i) {
+ AddressField address_field = static_cast<AddressField>(i);
+
+ IdRegionContainer* id_regions = new IdRegionContainer;
+ matching_regions.insert(std::make_pair(address_field, id_regions));
+
+ FieldMap::const_iterator field_map_it = field_map.find(address_field);
+ DCHECK(field_map_it != field_map.end());
+ const IdMap* id_map = field_map_it->second;
+ DCHECK(id_map);
+
+ for (IdMap::const_iterator id_map_it = id_map->begin();
+ id_map_it != id_map->end(); ++id_map_it) {
+ RegionField id = id_map_it->first;
+
+ RegionContainer* regions = new RegionContainer;
+ id_regions->insert(std::make_pair(id, regions));
+
+ const RegionDataTrie* trie = id_map_it->second;
+ DCHECK(trie);
+
+ trie->FindDataForKeyPrefix(
+ canonical_input.GetFieldValue(address_field), regions);
+
+ // Filter out the regions whose parents do not match the user input.
+ if (address_field > ADMIN_AREA) {
+ DCHECK(parent_id_regions);
+
+ IdRegionContainer::const_iterator parent_id_regions_it =
+ parent_id_regions->find(id);
+ DCHECK(parent_id_regions_it != parent_id_regions->end());
+
+ RegionContainer* parent_regions = parent_id_regions_it->second;
+ DCHECK(parent_regions);
+
+ RegionContainer regions_with_parents;
+ std::for_each(regions->begin(), regions->end(),
+ ParentedRegionCollector(
+ *parent_regions, &regions_with_parents));
+ regions->swap(regions_with_parents);
+ }
+
+ for (RegionContainer::const_iterator region_it = regions->begin();
+ region_it != regions->end(); ++region_it) {
+ if (address_field > suggestion_field) {
+ suggestion_field = address_field;
+ result.clear();
+ }
+ result.push_back(
+ SuggestionRegion(*region_it, BitsetFromBit(id), address_field));
+ }
+ }
+
+ parent_id_regions = id_regions;
+ }
+
+ // Cleanup.
Evan Stade 2014/06/19 00:38:00 please do not try to handle bare pointers, use stl
please use gerrit instead 2014/06/20 21:09:15 Done.
+ for (FieldRegionContainer::const_iterator
+ field_it = matching_regions.begin();
+ field_it != matching_regions.end(); ++field_it) {
+ IdRegionContainer* id_regions = field_it->second;
+ DCHECK(id_regions);
+ for (IdRegionContainer::const_iterator id_it = id_regions->begin();
+ id_it != id_regions->end(); ++id_it) {
+ RegionContainer* regions = id_it->second;
+ delete regions;
+ }
+ delete id_regions;
+ }
+
+ RemoveDuplicateRegions(&result);
+ return result;
+ }
+
+ private:
+ typedef Trie<const RegionData*> RegionDataTrie;
+ typedef std::map<RegionField, RegionDataTrie*> IdMap;
Evan Stade 2014/06/19 00:38:00 serious? you need a map for this? RegionField only
please use gerrit instead 2014/06/20 21:09:15 Switched to structs and classes.
+ typedef std::map<AddressField, IdMap*> FieldMap;
+ typedef std::map<const RegionData*, FieldMap*> RegionMap;
+
+ // Recursively builds tries from subregions of |parent| and add these tries to
+ // the |field_map|.
+ void AddTriesForSubRegionsOf(const RegionData& parent,
+ AddressField parent_field,
+ FieldMap* field_map) {
+ DCHECK(!parent.sub_regions().empty());
+ DCHECK(field_map);
+
+ AddressField field = static_cast<AddressField>(parent_field + 1);
+ DCHECK_GE(field, ADMIN_AREA);
+ DCHECK_LE(field, DEPENDENT_LOCALITY);
+
+ FieldMap::iterator field_it = field_map->find(field);
+ if (field_it == field_map->end())
+ field_it = field_map->insert(std::make_pair(field, new IdMap)).first;
+
+ IdMap* id_map = field_it->second;
+ DCHECK(id_map);
+
+ IdMap::iterator key_it = id_map->find(KEY);
+ if (key_it == id_map->end())
+ key_it = id_map->insert(std::make_pair(KEY, new RegionDataTrie)).first;
+ RegionDataTrie* key_trie = key_it->second;
+ DCHECK(key_trie);
+
+ IdMap::iterator name_it = id_map->find(NAME);
+ if (name_it == id_map->end())
+ name_it = id_map->insert(std::make_pair(NAME, new RegionDataTrie)).first;
+ RegionDataTrie* name_trie = name_it->second;
+ DCHECK(name_trie);
+
+ for (std::vector<const RegionData*>::const_iterator region_it =
+ parent.sub_regions().begin();
+ region_it != parent.sub_regions().end(); ++region_it) {
+ const RegionData* region = *region_it;
+ DCHECK(region);
+
+ key_trie->AddDataForKey(CanonicalizeString(region->key()), region);
+
+ if (!region->name().empty()) {
+ name_trie->AddDataForKey(CanonicalizeString(region->name()), region);
+ }
+
+ if (!region->sub_regions().empty())
+ AddTriesForSubRegionsOf(*region, field, field_map);
+ }
+ }
+
+ // Returns a canonical version of the string that can be used for comparing
+ // strings regardless of diacritics and capitalization.
+ // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas");
+ // CanonicalizeString("Texas") == CanonicalizeString("teXas");
+ // CanonicalizeString("Texas") != CanonicalizeString("California");
+ //
+ // The output is not human-readable.
+ // CanonicalizeString("Texas") != "Texas";
+ std::string CanonicalizeString(const std::string& original) const {
+ icu::UnicodeString icu_str(
+ original.c_str(), static_cast<int32_t>(original.length()));
+ int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0);
+ scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
+ DCHECK(buffer.get());
+ int32_t filled_size =
+ collator_->getSortKey(icu_str, buffer.get(), buffer_size);
+ DCHECK_EQ(buffer_size, filled_size);
+ return std::string(reinterpret_cast<const char*>(buffer.get()));
+ }
+
+ // A mapping of the country-level RegionData objects to a collection of Trie
+ // objects. Tries and maps are owned. RegionDatas are not owned.
+ RegionMap tries_;
+
+ // Data source for region data.
+ RegionDataBuilder region_data_builder_;
+
+ // Canonicalizes strings for case and diacritic insensitive search.
+ scoped_ptr<icu::Collator> collator_;
+
+ DISALLOW_COPY_AND_ASSIGN(TrieCollection);
+};
+
+InputSuggester::InputSuggester(PreloadSupplier* supplier)
+ : trie_collection_(new TrieCollection(supplier)) {}
+
+InputSuggester::~InputSuggester() {}
+
+void InputSuggester::GetSuggestions(const AddressData& user_input,
+ AddressField focused_field,
+ size_t suggestions_limit,
+ std::vector<AddressData>* suggestions) {
+ DCHECK(suggestions);
+ DCHECK(focused_field == POSTAL_CODE ||
+ (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
+
+ // Do not suggest anything if the user input is empty.
+ if (user_input.IsFieldEmpty(focused_field))
+ return;
+
+ // Build the list of regions that match |user_input| when the user is typing
+ // in the |focused_field|.
+ const std::vector<SuggestionRegion>& regions =
+ trie_collection_->BuildSuggestionRegions(user_input, focused_field);
+
+ // Generate suggestions based on the regions.
+ for (std::vector<SuggestionRegion>::const_iterator
+ region_it = regions.begin();
+ region_it != regions.end(); ++region_it) {
+ // Do not add more suggestions than |suggestions_limit|.
+ if (suggestions->size() >= suggestions_limit) {
+ suggestions->clear();
+ return;
+ }
+
+ AddressData suggestion;
+ suggestion.region_code = user_input.region_code;
+ suggestion.postal_code = user_input.postal_code;
+
+ // Traverse the tree of regions from the most specific |region| to the
+ // country-wide "root" of the tree. Use the region names found at each of
+ // the levels of the ruleset tree to build the |suggestion|.
+ DCHECK(region_it->region);
+ AddressField address_field = region_it->address_field;
+ for (const RegionData* suggestion_region = region_it->region;
+ suggestion_region->has_parent();
+ suggestion_region = &suggestion_region->parent()) {
+ suggestion.SetFieldValue(
+ address_field,
+ GetValueFromRegion(suggestion_region, region_it->region_fields));
+ address_field = static_cast<AddressField>(address_field - 1);
+ }
+
+ suggestions->push_back(suggestion);
+ }
+}
+
+} // namespace autofill

Powered by Google App Engine
This is Rietveld 408576698