| Index: third_party/libaddressinput/chromium/input_suggester.cc
|
| diff --git a/third_party/libaddressinput/chromium/input_suggester.cc b/third_party/libaddressinput/chromium/input_suggester.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..cf9f4d5eb8576a603a7f6642b9e44b17f98b025c
|
| --- /dev/null
|
| +++ b/third_party/libaddressinput/chromium/input_suggester.cc
|
| @@ -0,0 +1,499 @@
|
| +// Copyright 2014 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "third_party/libaddressinput/chromium/input_suggester.h"
|
| +
|
| +#include <map>
|
| +#include <set>
|
| +#include <utility>
|
| +
|
| +#include "base/basictypes.h"
|
| +#include "base/logging.h"
|
| +#include "base/memory/scoped_ptr.h"
|
| +#include "base/memory/scoped_vector.h"
|
| +#include "base/stl_util.h"
|
| +#include "third_party/icu/source/i18n/unicode/coll.h"
|
| +#include "third_party/libaddressinput/chromium/trie.h"
|
| +#include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_data.h"
|
| +#include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_data.h"
|
| +#include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_data_builder.h"
|
| +
|
| +namespace autofill {
|
| +
|
| +using ::i18n::addressinput::AddressData;
|
| +using ::i18n::addressinput::AddressField;
|
| +using ::i18n::addressinput::PreloadSupplier;
|
| +using ::i18n::addressinput::RegionData;
|
| +using ::i18n::addressinput::RegionDataBuilder;
|
| +
|
| +using ::i18n::addressinput::ADMIN_AREA;
|
| +using ::i18n::addressinput::COUNTRY;
|
| +using ::i18n::addressinput::DEPENDENT_LOCALITY;
|
| +using ::i18n::addressinput::LOCALITY;
|
| +using ::i18n::addressinput::POSTAL_CODE;
|
| +
|
| +namespace {
|
| +
|
| +// A region and its metadata useful for constructing a suggestion. The object is
|
| +// immutable and uncopyable.
|
| +struct Suggestion {
|
| + public:
|
| + // Builds a suggestion of |region_to_suggest|. Does not take ownership of
|
| + // |region_to_suggest|, which should not be NULL. At least one of
|
| + // |region_key_matches| and |region_name_names| should be true, otherwise it's
|
| + // not a valid suggestion.
|
| + Suggestion(const RegionData* region_to_suggest,
|
| + AddressField matching_address_field,
|
| + bool region_key_matches,
|
| + bool region_name_matches)
|
| + : region_to_suggest(region_to_suggest),
|
| + matching_address_field(matching_address_field),
|
| + region_key_matches(region_key_matches),
|
| + region_name_matches(region_name_matches) {
|
| + DCHECK(region_to_suggest);
|
| + DCHECK(region_key_matches || region_name_matches);
|
| + }
|
| +
|
| + ~Suggestion() {}
|
| +
|
| + // The region that should be suggested. For example, if
|
| + // |region_to_suggest->name()| is "California", then "California" or "CA"
|
| + // can be suggested.
|
| + const RegionData* const region_to_suggest;
|
| +
|
| + // The field in the address for which the suggestion should be made. For
|
| + // example, ADMIN_AREA in US means the suggestion should be made for the field
|
| + // labeled "State".
|
| + const AddressField matching_address_field;
|
| +
|
| + // True if the the key of the region matches user input and can be used in
|
| + // suggestion. For example, if this is true and |region_to_suggest->key()| is
|
| + // "CA", then "CA" cab be suggested.
|
| + const bool region_key_matches;
|
| +
|
| + // True if the name of the region matches user input and can be used in
|
| + // suggestion. For example, if this is true and |region_to_suggest->name()| is
|
| + // "California", then "California" can be suggested.
|
| + const bool region_name_matches;
|
| +};
|
| +
|
| +// Suggestions for an address. Contains lists of suggestions for every field in
|
| +// an address.
|
| +class AddressSuggestions {
|
| + public:
|
| + AddressSuggestions() {
|
| + for (int i = ADMIN_AREA; i <= LOCALITY; ++i)
|
| + parent_regions_[static_cast<AddressField>(i)] = new ParentRegions;
|
| + for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i)
|
| + suggestions_[static_cast<AddressField>(i)] = new ScopedVector<Suggestion>;
|
| + }
|
| +
|
| + ~AddressSuggestions() {
|
| + STLDeleteValues(&parent_regions_);
|
| + STLDeleteValues(&suggestions_);
|
| + }
|
| +
|
| + // Marks all regions at |address_field| level as matching user input.
|
| + void AllRegionsMatchForField(AddressField address_field) {
|
| + all_parent_regions_match_.insert(address_field);
|
| + }
|
| +
|
| + // Marks given regions at |address_field| level as matching user input. The
|
| + // |regions_match_key| parameter contains the regions that match user input by
|
| + // their keys. The |regions_match_name| parameter---by their names.
|
| + //
|
| + // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or
|
| + // DEPENDENT_LOCALITY.
|
| + bool AddRegions(AddressField address_field,
|
| + const std::set<const RegionData*>& regions_match_key,
|
| + const std::set<const RegionData*>& regions_match_name) {
|
| + DCHECK(address_field >= ADMIN_AREA);
|
| + DCHECK(address_field <= DEPENDENT_LOCALITY);
|
| +
|
| + AddressField parent_address_field =
|
| + static_cast<AddressField>(address_field - 1);
|
| +
|
| + bool all_parents_match =
|
| + parent_address_field == COUNTRY ||
|
| + all_parent_regions_match_.find(parent_address_field) !=
|
| + all_parent_regions_match_.end();
|
| +
|
| + // Cannot build |address_field| level suggestions if none of there are no
|
| + // matches in |parent_address_field| level regions.
|
| + const ParentRegions* parents = NULL;
|
| + if (address_field > ADMIN_AREA && !all_parents_match) {
|
| + parents = parent_regions_[parent_address_field];
|
| + if (parents->keys.empty() && parents->names.empty())
|
| + return false;
|
| + }
|
| +
|
| + ParentRegions* regions = NULL;
|
| + if (address_field < DEPENDENT_LOCALITY)
|
| + regions = parent_regions_[address_field];
|
| +
|
| + ScopedVector<Suggestion>* suggestions = suggestions_[address_field];
|
| + bool added_suggestions = false;
|
| +
|
| + // Iterate over both |regions_match_key| and |regions_match_name|. Advance
|
| + // either one iterator at a time (if they point to different data) or both
|
| + // iterators at once (if they point to the same data).
|
| + for (std::set<const RegionData*>::const_iterator
|
| + key_it = regions_match_key.begin(),
|
| + name_it = regions_match_name.begin();
|
| + key_it != regions_match_key.end() ||
|
| + name_it != regions_match_name.end();) {
|
| + const RegionData* key_region =
|
| + key_it != regions_match_key.end() ? *key_it : NULL;
|
| + const RegionData* name_region =
|
| + name_it != regions_match_name.end() ? *name_it : NULL;
|
| +
|
| + // Regions that do not have a parent that also matches input will not
|
| + // become a suggestion.
|
| + bool key_region_has_parent =
|
| + all_parents_match ||
|
| + (parents && !parents->keys.empty() && key_region &&
|
| + parents->keys.find(&key_region->parent()) != parents->keys.end());
|
| + bool name_region_has_parent =
|
| + all_parents_match ||
|
| + (parents && !parents->names.empty() && name_region &&
|
| + parents->names.find(&name_region->parent()) != parents->names.end());
|
| +
|
| + if (name_region && (!key_region || name_region < key_region)) {
|
| + if (name_region_has_parent) {
|
| + suggestions->push_back(
|
| + new Suggestion(name_region, address_field, false, true));
|
| + added_suggestions = true;
|
| + if (regions)
|
| + regions->names.insert(name_region);
|
| + }
|
| +
|
| + ++name_it;
|
| + } else if (key_region && (!name_region || key_region < name_region)) {
|
| + if (key_region_has_parent) {
|
| + suggestions->push_back(
|
| + new Suggestion(key_region, address_field, true, false));
|
| + added_suggestions = true;
|
| + if (regions)
|
| + regions->keys.insert(key_region);
|
| + }
|
| +
|
| + ++key_it;
|
| + } else {
|
| + if (key_region_has_parent) {
|
| + suggestions->push_back(
|
| + new Suggestion(key_region, address_field, true, true));
|
| + added_suggestions = true;
|
| + if (regions) {
|
| + regions->keys.insert(key_region);
|
| + regions->names.insert(name_region);
|
| + }
|
| + }
|
| +
|
| + ++key_it;
|
| + ++name_it;
|
| + }
|
| + }
|
| +
|
| + return added_suggestions;
|
| + }
|
| +
|
| + // Swaps the suggestions for the smallest sub-region into |suggestions|. This
|
| + // object is not usable after this call due to using the swap() operation.
|
| + //
|
| + // The |suggestions| parameter should not be NULL.
|
| + void SwapSmallestSubRegionSuggestions(ScopedVector<Suggestion>* suggestions) {
|
| + DCHECK(suggestions);
|
| + for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) {
|
| + ScopedVector<Suggestion>* result =
|
| + suggestions_[static_cast<AddressField>(i)];
|
| + if (!result->empty()) {
|
| + result->swap(*suggestions);
|
| + return;
|
| + }
|
| + }
|
| + }
|
| +
|
| + private:
|
| + // The sets of non-owned regions used for fast parent region lookup.
|
| + struct ParentRegions {
|
| + // Regions that match user input by key.
|
| + std::set<const RegionData*> keys;
|
| +
|
| + // Regions that match user input by name.
|
| + std::set<const RegionData*> names;
|
| + };
|
| +
|
| + // The owned sets of non-owned regions for past parent region lookup at
|
| + // ADMIN_AREA and LOCALITY levels.
|
| + std::map<AddressField, ParentRegions*> parent_regions_;
|
| +
|
| + // The set of fields for which all sub-regions match user input. Used to avoid
|
| + // storing a long list in |parent_regions_| and later looking it up there.
|
| + std::set<AddressField> all_parent_regions_match_;
|
| +
|
| + // The owned vectors of suggestions at ADMIN_AREA, LOCALITY, and
|
| + // DEPENDENT_LOCALITY levels.
|
| + std::map<AddressField, ScopedVector<Suggestion>*> suggestions_;
|
| +
|
| + DISALLOW_COPY_AND_ASSIGN(AddressSuggestions);
|
| +};
|
| +
|
| +} // namespace
|
| +
|
| +// Canonicalizes strings for fast case and diacritic insensitive comparison.
|
| +class StringCanonicalizer {
|
| + public:
|
| + // Initializes the canonicalizer. This is slow, so avoid calling it more often
|
| + // than necessary.
|
| + StringCanonicalizer() {
|
| + UErrorCode error_code = U_ZERO_ERROR;
|
| + collator_.reset(
|
| + icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
|
| + DCHECK(U_SUCCESS(error_code));
|
| + collator_->setStrength(icu::Collator::PRIMARY);
|
| + }
|
| +
|
| + ~StringCanonicalizer() {}
|
| +
|
| + // Returns a canonical version of the string that can be used for comparing
|
| + // strings regardless of diacritics and capitalization.
|
| + // Canonicalize("Texas") == Canonicalize("T\u00E9xas");
|
| + // Canonicalize("Texas") == Canonicalize("teXas");
|
| + // Canonicalize("Texas") != Canonicalize("California");
|
| + //
|
| + // The output is not human-readable.
|
| + // Canonicalize("Texas") != "Texas";
|
| + std::string Canonicalize(const std::string& original) const {
|
| + icu::UnicodeString icu_str(original.c_str(),
|
| + static_cast<int32_t>(original.length()));
|
| + int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0);
|
| + scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
|
| + DCHECK(buffer.get());
|
| + int32_t filled_size =
|
| + collator_->getSortKey(icu_str, buffer.get(), buffer_size);
|
| + DCHECK_EQ(buffer_size, filled_size);
|
| + return std::string(reinterpret_cast<const char*>(buffer.get()));
|
| + }
|
| +
|
| + private:
|
| + scoped_ptr<icu::Collator> collator_;
|
| +
|
| + DISALLOW_COPY_AND_ASSIGN(StringCanonicalizer);
|
| +};
|
| +
|
| +// All sub-regions of a COUNTRY level region with metadata useful for
|
| +// constructing suggestions.
|
| +class SubRegionData {
|
| + public:
|
| + SubRegionData(const RegionData& country_region,
|
| + const StringCanonicalizer& shared_canonicalizer)
|
| + : canonicalizer_(shared_canonicalizer), smallest_region_size_(COUNTRY) {
|
| + DCHECK(!country_region.has_parent());
|
| +
|
| + for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i)
|
| + field_data_[static_cast<AddressField>(i)] = new FieldData;
|
| +
|
| + if (!country_region.sub_regions().empty())
|
| + AddSubRegionsOf(country_region, COUNTRY);
|
| + }
|
| +
|
| + ~SubRegionData() { STLDeleteValues(&field_data_); }
|
| +
|
| + void BuildSuggestions(const AddressData& user_input,
|
| + AddressField focused_field,
|
| + ScopedVector<Suggestion>* results) const {
|
| + // Do not suggest anything if there's no suggestion data for the focused
|
| + // field.
|
| + if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field)
|
| + return;
|
| +
|
| + // Canonicalized user input data for lookup in the tries.
|
| + AddressData canonical_input = user_input;
|
| + for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i) {
|
| + AddressField address_field = static_cast<AddressField>(i);
|
| + const std::string& field_value = user_input.GetFieldValue(address_field);
|
| + if (!field_value.empty()) {
|
| + canonical_input.SetFieldValue(address_field,
|
| + canonicalizer_.Canonicalize(field_value));
|
| + }
|
| + }
|
| +
|
| + // Non-owned regions that match a field value in user input by region key or
|
| + // name.
|
| + std::set<const RegionData*> regions_match_key;
|
| + std::set<const RegionData*> regions_match_name;
|
| +
|
| + AddressSuggestions suggestions;
|
| + for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY;
|
| + ++i) {
|
| + AddressField address_field = static_cast<AddressField>(i);
|
| + AddressField parent_address_field = static_cast<AddressField>(i - 1);
|
| +
|
| + const std::string& canonical_field_value =
|
| + canonical_input.GetFieldValue(address_field);
|
| +
|
| + if (canonical_field_value.empty() &&
|
| + (address_field == ADMIN_AREA ||
|
| + canonical_input.GetFieldValue(parent_address_field).empty())) {
|
| + suggestions.AllRegionsMatchForField(address_field);
|
| + continue;
|
| + }
|
| +
|
| + regions_match_key.clear();
|
| + regions_match_name.clear();
|
| +
|
| + const FieldData* field_data = field_data_.find(address_field)->second;
|
| + field_data->keys.FindDataForKeyPrefix(canonical_field_value,
|
| + ®ions_match_key);
|
| + field_data->names.FindDataForKeyPrefix(canonical_field_value,
|
| + ®ions_match_name);
|
| +
|
| + bool added_suggestions = suggestions.AddRegions(
|
| + address_field, regions_match_key, regions_match_name);
|
| +
|
| + // Do not suggest anything if the focused field does not have suggestions.
|
| + if (address_field == focused_field && !added_suggestions)
|
| + return;
|
| + }
|
| +
|
| + suggestions.SwapSmallestSubRegionSuggestions(results);
|
| + }
|
| +
|
| + private:
|
| + // The tries to lookup regions for a specific field by keys and names. For
|
| + // example, the FieldData for ADMIN_AREA in US will have keys for "AL", "AK",
|
| + // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The
|
| + // struct is uncopyable due to Trie objects being uncopyable.
|
| + struct FieldData {
|
| + Trie<const RegionData*> keys;
|
| + Trie<const RegionData*> names;
|
| + };
|
| +
|
| + void AddSubRegionsOf(const RegionData& parent_region,
|
| + AddressField parent_field) {
|
| + DCHECK(!parent_region.sub_regions().empty());
|
| +
|
| + AddressField field = static_cast<AddressField>(parent_field + 1);
|
| + DCHECK(field >= ADMIN_AREA);
|
| + DCHECK(field <= DEPENDENT_LOCALITY);
|
| +
|
| + FieldData* field_data = field_data_[field];
|
| + DCHECK(field_data);
|
| +
|
| + for (std::vector<const RegionData*>::const_iterator it =
|
| + parent_region.sub_regions().begin();
|
| + it != parent_region.sub_regions().end();
|
| + ++it) {
|
| + const RegionData* region = *it;
|
| + DCHECK(region);
|
| +
|
| + field_data->keys.AddDataForKey(canonicalizer_.Canonicalize(region->key()),
|
| + region);
|
| + field_data->names.AddDataForKey(
|
| + canonicalizer_.Canonicalize(region->name()), region);
|
| +
|
| + if (smallest_region_size_ < field)
|
| + smallest_region_size_ = field;
|
| +
|
| + if (!region->sub_regions().empty())
|
| + AddSubRegionsOf(*region, field);
|
| + }
|
| + }
|
| +
|
| + // Owned tries to lookup regions for ADMIN_AREA, LOCALITY, and
|
| + // DEPENDENT_LOCALITY.
|
| + std::map<AddressField, FieldData*> field_data_;
|
| +
|
| + // The smallest size of a sub-region that has data. For example, this is
|
| + // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN.
|
| + AddressField smallest_region_size_;
|
| +
|
| + // A shared instance of string canonicalizer for case and diacritic comparison
|
| + // of region keys and names.
|
| + const StringCanonicalizer& canonicalizer_;
|
| +
|
| + DISALLOW_COPY_AND_ASSIGN(SubRegionData);
|
| +};
|
| +
|
| +InputSuggester::InputSuggester(PreloadSupplier* supplier)
|
| + : region_data_builder_(supplier) {
|
| +}
|
| +
|
| +InputSuggester::~InputSuggester() {
|
| + STLDeleteValues(&sub_regions_);
|
| +}
|
| +
|
| +void InputSuggester::GetSuggestions(const AddressData& user_input,
|
| + AddressField focused_field,
|
| + size_t suggestions_limit,
|
| + std::vector<AddressData>* suggestions) {
|
| + DCHECK(suggestions);
|
| + DCHECK(focused_field == POSTAL_CODE ||
|
| + (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
|
| +
|
| + // Do not suggest anything if the user input is empty.
|
| + if (user_input.IsFieldEmpty(focused_field))
|
| + return;
|
| +
|
| + // Lazily initialize the mapping from fields to Trie objects.
|
| + std::string unused_best_language;
|
| + const RegionData& region_data = region_data_builder_.Build(
|
| + user_input.region_code, user_input.language_code, &unused_best_language);
|
| +
|
| + std::map<const RegionData*, const SubRegionData*>::iterator
|
| + sub_region_data_it = sub_regions_.find(®ion_data);
|
| + if (sub_region_data_it == sub_regions_.end()) {
|
| + if (!canonicalizer_) {
|
| + canonicalizer_.reset(new StringCanonicalizer);
|
| + }
|
| + sub_region_data_it =
|
| + sub_regions_.insert(std::make_pair(®ion_data,
|
| + new SubRegionData(region_data,
|
| + *canonicalizer_)))
|
| + .first;
|
| + }
|
| + DCHECK(sub_region_data_it->second);
|
| +
|
| + // Build the list of regions that match |user_input| when the user is typing
|
| + // in the |focused_field|.
|
| + ScopedVector<Suggestion> suggested_regions;
|
| + sub_region_data_it->second->BuildSuggestions(
|
| + user_input, focused_field, &suggested_regions);
|
| +
|
| + // Generate suggestions based on the regions.
|
| + for (ScopedVector<Suggestion>::const_iterator suggestion_it =
|
| + suggested_regions.begin();
|
| + suggestion_it != suggested_regions.end();
|
| + ++suggestion_it) {
|
| + Suggestion* suggested_region = *suggestion_it;
|
| +
|
| + // Do not add more suggestions than |suggestions_limit|.
|
| + if (suggestions->size() >= suggestions_limit) {
|
| + suggestions->clear();
|
| + return;
|
| + }
|
| +
|
| + AddressData address;
|
| + address.region_code = user_input.region_code;
|
| + address.postal_code = user_input.postal_code;
|
| +
|
| + // Traverse the tree of regions from the most specific |region| to the
|
| + // country-wide "root" of the tree. Use the region names found at each of
|
| + // the levels of the ruleset tree to build the |suggestion|.
|
| + AddressField address_field = suggested_region->matching_address_field;
|
| + for (const RegionData* region = suggested_region->region_to_suggest;
|
| + region->has_parent();
|
| + region = ®ion->parent()) {
|
| + address.SetFieldValue(address_field,
|
| + suggested_region->region_key_matches
|
| + ? region->key()
|
| + : region->name());
|
| + address_field = static_cast<AddressField>(address_field - 1);
|
| + }
|
| +
|
| + suggestions->push_back(address);
|
| + }
|
| +}
|
| +
|
| +} // namespace autofill
|
|
|