Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(54)

Side by Side Diff: third_party/libaddressinput/chromium/input_suggester.cc

Issue 298863012: Use upstream libaddressinput in Chrome. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Work in progress. Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/libaddressinput/chromium/input_suggester.h"
6
7 #include <map>
8 #include <set>
9 #include <utility>
10
11 #include "base/basictypes.h"
12 #include "base/logging.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "base/memory/scoped_vector.h"
15 #include "base/stl_util.h"
16 #include "third_party/icu/source/i18n/unicode/coll.h"
17 #include "third_party/libaddressinput/chromium/trie.h"
18 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da ta.h"
19 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a.h"
20 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a_builder.h"
21
22 namespace autofill {
23
24 using ::i18n::addressinput::AddressData;
25 using ::i18n::addressinput::AddressField;
26 using ::i18n::addressinput::PreloadSupplier;
27 using ::i18n::addressinput::RegionData;
28 using ::i18n::addressinput::RegionDataBuilder;
29
30 using ::i18n::addressinput::ADMIN_AREA;
31 using ::i18n::addressinput::COUNTRY;
32 using ::i18n::addressinput::DEPENDENT_LOCALITY;
33 using ::i18n::addressinput::LOCALITY;
34 using ::i18n::addressinput::POSTAL_CODE;
35
36 namespace {
37
38 // A region and its metadata useful for constructing a suggestion. The object is
39 // immutable and uncopyable.
40 struct Suggestion {
41 public:
42 // Builds a suggestion of |region_to_suggest|. Does not take ownership of
43 // |region_to_suggest|, which should not be NULL. At least one of
44 // |region_key_matches| and |region_name_names| should be true, otherwise it's
45 // not a valid suggestion.
46 Suggestion(const RegionData* region_to_suggest,
47 AddressField matching_address_field,
48 bool region_key_matches,
49 bool region_name_matches)
50 : region_to_suggest(region_to_suggest),
51 matching_address_field(matching_address_field),
52 region_key_matches(region_key_matches),
53 region_name_matches(region_name_matches) {
54 DCHECK(region_to_suggest);
55 DCHECK(region_key_matches || region_name_matches);
56 }
57
58 ~Suggestion() {}
59
60 // The region that should be suggested. For example, if
61 // |region_to_suggest->name()| is "California", then "California" or "CA"
62 // can be suggested.
63 const RegionData* const region_to_suggest;
64
65 // The field in the address for which the suggestion should be made. For
66 // example, ADMIN_AREA in US means the suggestion should be made for the field
67 // labeled "State".
68 const AddressField matching_address_field;
69
70 // True if the the key of the region matches user input and can be used in
71 // suggestion. For example, if this is true and |region_to_suggest->key()| is
72 // "CA", then "CA" cab be suggested.
73 const bool region_key_matches;
74
75 // True if the name of the region matches user input and can be used in
76 // suggestion. For example, if this is true and |region_to_suggest->name()| is
77 // "California", then "California" can be suggested.
78 const bool region_name_matches;
79 };
80
81 // Suggestions for an address. Contains lists of suggestions for every field in
82 // an address.
83 class AddressSuggestions {
84 public:
85 AddressSuggestions() {
86 for (int i = ADMIN_AREA; i <= LOCALITY; ++i)
87 parent_regions_[static_cast<AddressField>(i)] = new ParentRegions;
88 for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i)
89 suggestions_[static_cast<AddressField>(i)] = new ScopedVector<Suggestion>;
90 }
91
92 ~AddressSuggestions() {
93 STLDeleteValues(&parent_regions_);
94 STLDeleteValues(&suggestions_);
95 }
96
97 // Marks all regions at |address_field| level as matching user input.
98 void AllRegionsMatchForField(AddressField address_field) {
99 all_parent_regions_match_.insert(address_field);
100 }
101
102 // Marks given regions at |address_field| level as matching user input. The
103 // |regions_match_key| parameter contains the regions that match user input by
104 // their keys. The |regions_match_name| parameter---by their names.
105 //
106 // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or
107 // DEPENDENT_LOCALITY.
108 bool AddRegions(AddressField address_field,
109 const std::set<const RegionData*>& regions_match_key,
110 const std::set<const RegionData*>& regions_match_name) {
111 DCHECK(address_field >= ADMIN_AREA);
112 DCHECK(address_field <= DEPENDENT_LOCALITY);
113
114 AddressField parent_address_field =
115 static_cast<AddressField>(address_field - 1);
116
117 bool all_parents_match =
118 parent_address_field == COUNTRY ||
119 all_parent_regions_match_.find(parent_address_field) !=
120 all_parent_regions_match_.end();
121
122 // Cannot build |address_field| level suggestions if none of there are no
123 // matches in |parent_address_field| level regions.
124 const ParentRegions* parents = NULL;
125 if (address_field > ADMIN_AREA && !all_parents_match) {
126 parents = parent_regions_[parent_address_field];
127 if (parents->keys.empty() && parents->names.empty())
128 return false;
129 }
130
131 ParentRegions* regions = NULL;
132 if (address_field < DEPENDENT_LOCALITY)
133 regions = parent_regions_[address_field];
134
135 ScopedVector<Suggestion>* suggestions = suggestions_[address_field];
136 bool added_suggestions = false;
137
138 // Iterate over both |regions_match_key| and |regions_match_name|. Advance
139 // either one iterator at a time (if they point to different data) or both
140 // iterators at once (if they point to the same data).
141 for (std::set<const RegionData*>::const_iterator
142 key_it = regions_match_key.begin(),
143 name_it = regions_match_name.begin();
144 key_it != regions_match_key.end() ||
145 name_it != regions_match_name.end();) {
146 const RegionData* key_region =
147 key_it != regions_match_key.end() ? *key_it : NULL;
148 const RegionData* name_region =
149 name_it != regions_match_name.end() ? *name_it : NULL;
150
151 // Regions that do not have a parent that also matches input will not
152 // become a suggestion.
153 bool key_region_has_parent =
154 all_parents_match ||
155 (parents && !parents->keys.empty() && key_region &&
156 parents->keys.find(&key_region->parent()) != parents->keys.end());
157 bool name_region_has_parent =
158 all_parents_match ||
159 (parents && !parents->names.empty() && name_region &&
160 parents->names.find(&name_region->parent()) != parents->names.end());
161
162 if (name_region && (!key_region || name_region < key_region)) {
163 if (name_region_has_parent) {
164 suggestions->push_back(
165 new Suggestion(name_region, address_field, false, true));
166 added_suggestions = true;
167 if (regions)
168 regions->names.insert(name_region);
169 }
170
171 ++name_it;
172 } else if (key_region && (!name_region || key_region < name_region)) {
173 if (key_region_has_parent) {
174 suggestions->push_back(
175 new Suggestion(key_region, address_field, true, false));
176 added_suggestions = true;
177 if (regions)
178 regions->keys.insert(key_region);
179 }
180
181 ++key_it;
182 } else {
183 if (key_region_has_parent) {
184 suggestions->push_back(
185 new Suggestion(key_region, address_field, true, true));
186 added_suggestions = true;
187 if (regions) {
188 regions->keys.insert(key_region);
189 regions->names.insert(name_region);
190 }
191 }
192
193 ++key_it;
194 ++name_it;
195 }
196 }
197
198 return added_suggestions;
199 }
200
201 // Swaps the suggestions for the smallest sub-region into |suggestions|. This
202 // object is not usable after this call due to using the swap() operation.
203 //
204 // The |suggestions| parameter should not be NULL.
205 void SwapSmallestSubRegionSuggestions(ScopedVector<Suggestion>* suggestions) {
206 DCHECK(suggestions);
207 for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) {
208 ScopedVector<Suggestion>* result =
209 suggestions_[static_cast<AddressField>(i)];
210 if (!result->empty()) {
211 result->swap(*suggestions);
212 return;
213 }
214 }
215 }
216
217 private:
218 // The sets of non-owned regions used for fast parent region lookup.
219 struct ParentRegions {
220 // Regions that match user input by key.
221 std::set<const RegionData*> keys;
222
223 // Regions that match user input by name.
224 std::set<const RegionData*> names;
225 };
226
227 // The owned sets of non-owned regions for past parent region lookup at
228 // ADMIN_AREA and LOCALITY levels.
229 std::map<AddressField, ParentRegions*> parent_regions_;
230
231 // The set of fields for which all sub-regions match user input. Used to avoid
232 // storing a long list in |parent_regions_| and later looking it up there.
233 std::set<AddressField> all_parent_regions_match_;
234
235 // The owned vectors of suggestions at ADMIN_AREA, LOCALITY, and
236 // DEPENDENT_LOCALITY levels.
237 std::map<AddressField, ScopedVector<Suggestion>*> suggestions_;
238
239 DISALLOW_COPY_AND_ASSIGN(AddressSuggestions);
240 };
241
242 } // namespace
243
244 // Canonicalizes strings for fast case and diacritic insensitive comparison.
245 class StringCanonicalizer {
246 public:
247 // Initializes the canonicalizer. This is slow, so avoid calling it more often
248 // than necessary.
249 StringCanonicalizer() {
250 UErrorCode error_code = U_ZERO_ERROR;
251 collator_.reset(
252 icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
253 DCHECK(U_SUCCESS(error_code));
254 collator_->setStrength(icu::Collator::PRIMARY);
255 }
256
257 ~StringCanonicalizer() {}
258
259 // Returns a canonical version of the string that can be used for comparing
260 // strings regardless of diacritics and capitalization.
261 // Canonicalize("Texas") == Canonicalize("T\u00E9xas");
262 // Canonicalize("Texas") == Canonicalize("teXas");
263 // Canonicalize("Texas") != Canonicalize("California");
264 //
265 // The output is not human-readable.
266 // Canonicalize("Texas") != "Texas";
267 std::string Canonicalize(const std::string& original) const {
268 icu::UnicodeString icu_str(original.c_str(),
269 static_cast<int32_t>(original.length()));
270 int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0);
271 scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
272 DCHECK(buffer.get());
273 int32_t filled_size =
274 collator_->getSortKey(icu_str, buffer.get(), buffer_size);
275 DCHECK_EQ(buffer_size, filled_size);
276 return std::string(reinterpret_cast<const char*>(buffer.get()));
277 }
278
279 private:
280 scoped_ptr<icu::Collator> collator_;
281
282 DISALLOW_COPY_AND_ASSIGN(StringCanonicalizer);
283 };
284
285 // All sub-regions of a COUNTRY level region with metadata useful for
286 // constructing suggestions.
287 class SubRegionData {
288 public:
289 SubRegionData(const RegionData& country_region,
290 const StringCanonicalizer& shared_canonicalizer)
291 : canonicalizer_(shared_canonicalizer), smallest_region_size_(COUNTRY) {
292 DCHECK(!country_region.has_parent());
293
294 for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i)
295 field_data_[static_cast<AddressField>(i)] = new FieldData;
296
297 if (!country_region.sub_regions().empty())
298 AddSubRegionsOf(country_region, COUNTRY);
299 }
300
301 ~SubRegionData() { STLDeleteValues(&field_data_); }
302
303 void BuildSuggestions(const AddressData& user_input,
304 AddressField focused_field,
305 ScopedVector<Suggestion>* results) const {
306 // Do not suggest anything if there's no suggestion data for the focused
307 // field.
308 if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field)
309 return;
310
311 // Canonicalized user input data for lookup in the tries.
312 AddressData canonical_input = user_input;
313 for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i) {
314 AddressField address_field = static_cast<AddressField>(i);
315 const std::string& field_value = user_input.GetFieldValue(address_field);
316 if (!field_value.empty()) {
317 canonical_input.SetFieldValue(address_field,
318 canonicalizer_.Canonicalize(field_value));
319 }
320 }
321
322 // Non-owned regions that match a field value in user input by region key or
323 // name.
324 std::set<const RegionData*> regions_match_key;
325 std::set<const RegionData*> regions_match_name;
326
327 AddressSuggestions suggestions;
328 for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY;
329 ++i) {
330 AddressField address_field = static_cast<AddressField>(i);
331 AddressField parent_address_field = static_cast<AddressField>(i - 1);
332
333 const std::string& canonical_field_value =
334 canonical_input.GetFieldValue(address_field);
335
336 if (canonical_field_value.empty() &&
337 (address_field == ADMIN_AREA ||
338 canonical_input.GetFieldValue(parent_address_field).empty())) {
339 suggestions.AllRegionsMatchForField(address_field);
340 continue;
341 }
342
343 regions_match_key.clear();
344 regions_match_name.clear();
345
346 const FieldData* field_data = field_data_.find(address_field)->second;
347 field_data->keys.FindDataForKeyPrefix(canonical_field_value,
348 &regions_match_key);
349 field_data->names.FindDataForKeyPrefix(canonical_field_value,
350 &regions_match_name);
351
352 bool added_suggestions = suggestions.AddRegions(
353 address_field, regions_match_key, regions_match_name);
354
355 // Do not suggest anything if the focused field does not have suggestions.
356 if (address_field == focused_field && !added_suggestions)
357 return;
358 }
359
360 suggestions.SwapSmallestSubRegionSuggestions(results);
361 }
362
363 private:
364 // The tries to lookup regions for a specific field by keys and names. For
365 // example, the FieldData for ADMIN_AREA in US will have keys for "AL", "AK",
366 // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The
367 // struct is uncopyable due to Trie objects being uncopyable.
368 struct FieldData {
369 Trie<const RegionData*> keys;
370 Trie<const RegionData*> names;
371 };
372
373 void AddSubRegionsOf(const RegionData& parent_region,
374 AddressField parent_field) {
375 DCHECK(!parent_region.sub_regions().empty());
376
377 AddressField field = static_cast<AddressField>(parent_field + 1);
378 DCHECK(field >= ADMIN_AREA);
379 DCHECK(field <= DEPENDENT_LOCALITY);
380
381 FieldData* field_data = field_data_[field];
382 DCHECK(field_data);
383
384 for (std::vector<const RegionData*>::const_iterator it =
385 parent_region.sub_regions().begin();
386 it != parent_region.sub_regions().end();
387 ++it) {
388 const RegionData* region = *it;
389 DCHECK(region);
390
391 field_data->keys.AddDataForKey(canonicalizer_.Canonicalize(region->key()),
392 region);
393 field_data->names.AddDataForKey(
394 canonicalizer_.Canonicalize(region->name()), region);
395
396 if (smallest_region_size_ < field)
397 smallest_region_size_ = field;
398
399 if (!region->sub_regions().empty())
400 AddSubRegionsOf(*region, field);
401 }
402 }
403
404 // Owned tries to lookup regions for ADMIN_AREA, LOCALITY, and
405 // DEPENDENT_LOCALITY.
406 std::map<AddressField, FieldData*> field_data_;
407
408 // The smallest size of a sub-region that has data. For example, this is
409 // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN.
410 AddressField smallest_region_size_;
411
412 // A shared instance of string canonicalizer for case and diacritic comparison
413 // of region keys and names.
414 const StringCanonicalizer& canonicalizer_;
415
416 DISALLOW_COPY_AND_ASSIGN(SubRegionData);
417 };
418
419 InputSuggester::InputSuggester(PreloadSupplier* supplier)
420 : region_data_builder_(supplier) {
421 }
422
423 InputSuggester::~InputSuggester() {
424 STLDeleteValues(&sub_regions_);
425 }
426
427 void InputSuggester::GetSuggestions(const AddressData& user_input,
428 AddressField focused_field,
429 size_t suggestions_limit,
430 std::vector<AddressData>* suggestions) {
431 DCHECK(suggestions);
432 DCHECK(focused_field == POSTAL_CODE ||
433 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
434
435 // Do not suggest anything if the user input is empty.
436 if (user_input.IsFieldEmpty(focused_field))
437 return;
438
439 // Lazily initialize the mapping from fields to Trie objects.
440 std::string unused_best_language;
441 const RegionData& region_data = region_data_builder_.Build(
442 user_input.region_code, user_input.language_code, &unused_best_language);
443
444 std::map<const RegionData*, const SubRegionData*>::iterator
445 sub_region_data_it = sub_regions_.find(&region_data);
446 if (sub_region_data_it == sub_regions_.end()) {
447 if (!canonicalizer_) {
448 canonicalizer_.reset(new StringCanonicalizer);
449 }
450 sub_region_data_it =
451 sub_regions_.insert(std::make_pair(&region_data,
452 new SubRegionData(region_data,
453 *canonicalizer_)))
454 .first;
455 }
456 DCHECK(sub_region_data_it->second);
457
458 // Build the list of regions that match |user_input| when the user is typing
459 // in the |focused_field|.
460 ScopedVector<Suggestion> suggested_regions;
461 sub_region_data_it->second->BuildSuggestions(
462 user_input, focused_field, &suggested_regions);
463
464 // Generate suggestions based on the regions.
465 for (ScopedVector<Suggestion>::const_iterator suggestion_it =
466 suggested_regions.begin();
467 suggestion_it != suggested_regions.end();
468 ++suggestion_it) {
469 Suggestion* suggested_region = *suggestion_it;
470
471 // Do not add more suggestions than |suggestions_limit|.
472 if (suggestions->size() >= suggestions_limit) {
473 suggestions->clear();
474 return;
475 }
476
477 AddressData address;
478 address.region_code = user_input.region_code;
479 address.postal_code = user_input.postal_code;
480
481 // Traverse the tree of regions from the most specific |region| to the
482 // country-wide "root" of the tree. Use the region names found at each of
483 // the levels of the ruleset tree to build the |suggestion|.
484 AddressField address_field = suggested_region->matching_address_field;
485 for (const RegionData* region = suggested_region->region_to_suggest;
486 region->has_parent();
487 region = &region->parent()) {
488 address.SetFieldValue(address_field,
489 suggested_region->region_key_matches
490 ? region->key()
491 : region->name());
492 address_field = static_cast<AddressField>(address_field - 1);
493 }
494
495 suggestions->push_back(address);
496 }
497 }
498
499 } // namespace autofill
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698