Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(296)

Side by Side Diff: third_party/libaddressinput/chromium/input_suggester.cc

Issue 298863012: Use upstream libaddressinput in Chrome. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix AutofillProfileSyncableServiceTest. Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/libaddressinput/chromium/input_suggester.h"
6
7 #include <map>
8 #include <set>
9 #include <utility>
10
11 #include "base/basictypes.h"
12 #include "base/logging.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "base/memory/scoped_vector.h"
15 #include "base/stl_util.h"
16 #include "third_party/icu/source/i18n/unicode/coll.h"
17 #include "third_party/libaddressinput/chromium/trie.h"
18 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da ta.h"
19 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/callback.h "
20 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_su pplier.h"
21 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a.h"
22 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a_builder.h"
23
24 namespace autofill {
25
26 using ::i18n::addressinput::AddressData;
27 using ::i18n::addressinput::AddressField;
28 using ::i18n::addressinput::BuildCallback;
29 using ::i18n::addressinput::FieldProblemMap;
30 using ::i18n::addressinput::PreloadSupplier;
31 using ::i18n::addressinput::RegionData;
32 using ::i18n::addressinput::RegionDataBuilder;
33
34 using ::i18n::addressinput::ADMIN_AREA;
35 using ::i18n::addressinput::COUNTRY;
36 using ::i18n::addressinput::DEPENDENT_LOCALITY;
37 using ::i18n::addressinput::LOCALITY;
38 using ::i18n::addressinput::POSTAL_CODE;
39
40 using ::i18n::addressinput::INVALID_FORMAT;
41 using ::i18n::addressinput::MISMATCHING_VALUE;
42
43 namespace {
44
45 // A region and its metadata useful for constructing a suggestion.
46 struct Suggestion {
47 public:
48 // Builds a suggestion of |region_to_suggest|. Does not take ownership of
49 // |region_to_suggest|, which should not be NULL.
50 Suggestion(const RegionData* region_to_suggest,
51 AddressField matching_address_field,
52 bool region_key_matches)
53 : region_to_suggest(region_to_suggest),
54 matching_address_field(matching_address_field),
55 region_key_matches(region_key_matches) {
56 DCHECK(region_to_suggest);
57 }
58
59 ~Suggestion() {}
60
61 // The region that should be suggested. For example, if the region is ("CA",
62 // "California"), then either "CA" or "California" should be suggested.
63 const RegionData* region_to_suggest;
64
65 // The field in the address for which the suggestion should be made. For
66 // example, ADMIN_AREA in US means the suggestion should be made for the field
67 // labeled "State".
68 AddressField matching_address_field;
69
70 // True if the key of the region matches user input (the name may or may not
71 // match). "CA" should be suggested for a ("CA", "California") region.
72 //
73 // False if only the name of the region matches user input (key does not
74 // match). "California" should be suggested fir a ("CA", "California") region.
75 bool region_key_matches;
76 };
77
78 // Suggestions for an address. Contains lists of suggestions for administrative
79 // area, locality, and dependent locality fields of an address.
80 class AddressSuggestions {
81 public:
82 AddressSuggestions() {
83 for (int i = ADMIN_AREA; i <= LOCALITY; ++i) {
84 regions_match_input_[static_cast<AddressField>(i)] =
85 new RegionsMatchInput;
86 }
87 for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i)
88 suggestions_[static_cast<AddressField>(i)] = new ScopedVector<Suggestion>;
89 }
90
91 ~AddressSuggestions() {
92 STLDeleteValues(&regions_match_input_);
93 STLDeleteValues(&suggestions_);
94 }
95
96 // Marks all regions at |address_field| level as matching user input.
97 void AllRegionsMatchForField(AddressField address_field) {
98 all_regions_match_input_.insert(address_field);
99 }
100
101 // Marks given regions at |address_field| level as matching user input. The
102 // |regions_match_key| parameter contains the regions that match user input by
103 // their keys. The |regions_match_name| parameter contains the regions that
104 // match user input by their names.
105 //
106 // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or
107 // DEPENDENT_LOCALITY.
108 bool AddRegions(AddressField address_field,
109 const std::set<const RegionData*>& regions_match_key,
110 const std::set<const RegionData*>& regions_match_name) {
111 DCHECK(address_field >= ADMIN_AREA);
112 DCHECK(address_field <= DEPENDENT_LOCALITY);
113
114 AddressField parent_address_field =
115 static_cast<AddressField>(address_field - 1);
116
117 bool all_parents_match =
118 parent_address_field == COUNTRY ||
119 all_regions_match_input_.find(parent_address_field) !=
120 all_regions_match_input_.end();
121
122 // Cannot build |address_field| level suggestions if there are no matches in
123 // |parent_address_field| level regions.
124 const RegionsMatchInput* parents = NULL;
125 if (address_field > ADMIN_AREA && !all_parents_match) {
126 parents = regions_match_input_[parent_address_field];
127 if (parents->keys.empty() && parents->names.empty())
128 return false;
129 }
130
131 RegionsMatchInput* regions = NULL;
132 if (address_field < DEPENDENT_LOCALITY)
133 regions = regions_match_input_[address_field];
134
135 ScopedVector<Suggestion>* suggestions = suggestions_[address_field];
136 bool added_suggestions = false;
137
138 // Iterate over both |regions_match_key| and |regions_match_name| and build
139 // Suggestion objects based on the given RegionData objects. Advance either
140 // one iterator at a time (if they point to different data) or both
141 // iterators at once (if they point to the same data).
142 for (std::set<const RegionData*>::const_iterator
143 key_it = regions_match_key.begin(),
144 name_it = regions_match_name.begin();
145 key_it != regions_match_key.end() ||
146 name_it != regions_match_name.end();) {
147 const RegionData* key_region =
148 key_it != regions_match_key.end() ? *key_it : NULL;
149 const RegionData* name_region =
150 name_it != regions_match_name.end() ? *name_it : NULL;
151
152 // Regions that do not have a parent that also matches input will not
153 // become a suggestion.
154 bool key_region_has_parent =
155 all_parents_match ||
156 (parents && !parents->keys.empty() && key_region &&
157 parents->keys.find(&key_region->parent()) != parents->keys.end());
158 bool name_region_has_parent =
159 all_parents_match ||
160 (parents && !parents->names.empty() && name_region &&
161 parents->names.find(&name_region->parent()) != parents->names.end());
162
163 if (name_region && (!key_region || name_region < key_region)) {
164 if (name_region_has_parent) {
165 suggestions->push_back(
166 new Suggestion(name_region, address_field, false));
167 added_suggestions = true;
168 if (regions)
169 regions->names.insert(name_region);
170 }
171
172 ++name_it;
173 } else if (key_region && (!name_region || key_region < name_region)) {
174 if (key_region_has_parent) {
175 suggestions->push_back(
176 new Suggestion(key_region, address_field, true));
177 added_suggestions = true;
178 if (regions)
179 regions->keys.insert(key_region);
180 }
181
182 ++key_it;
183 } else {
184 if (key_region_has_parent) {
185 suggestions->push_back(
186 new Suggestion(key_region, address_field, true));
187 added_suggestions = true;
188 if (regions) {
189 regions->keys.insert(key_region);
190 regions->names.insert(name_region);
191 }
192 }
193
194 ++key_it;
195 ++name_it;
196 }
197 }
198
199 return added_suggestions;
200 }
201
202 // Swaps the suggestions for the smallest sub-region into |suggestions|. This
203 // object is not usable after this call due to using the swap() operation.
204 //
205 // The |suggestions| parameter should not be NULL.
206 void SwapSmallestSubRegionSuggestions(ScopedVector<Suggestion>* suggestions) {
207 DCHECK(suggestions);
208 for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) {
209 ScopedVector<Suggestion>* result =
210 suggestions_[static_cast<AddressField>(i)];
211 if (!result->empty()) {
212 result->swap(*suggestions);
213 return;
214 }
215 }
216 }
217
218 private:
219 // The sets of non-owned regions used for looking up regions that match user
220 // input by keys and names.
221 struct RegionsMatchInput {
222 std::set<const RegionData*> keys;
223 std::set<const RegionData*> names;
224 };
225
226 // The owned sets of regions that match user input at ADMIN_AREA and LOCALITY
227 // levels.
228 std::map<AddressField, RegionsMatchInput*> regions_match_input_;
229
230 // The set of fields for which all regions match user input. Used to avoid
231 // storing a long list in |regions_match_input_| and later looking it up
232 // there.
233 std::set<AddressField> all_regions_match_input_;
234
235 // The owned vectors of suggestions at ADMIN_AREA, LOCALITY, and
236 // DEPENDENT_LOCALITY levels.
237 std::map<AddressField, ScopedVector<Suggestion>*> suggestions_;
238
239 DISALLOW_COPY_AND_ASSIGN(AddressSuggestions);
240 };
241
242 } // namespace
243
244 // Canonicalizes strings for case and diacritic insensitive comparison.
245 class StringCanonicalizer {
246 public:
247 // Initializes the canonicalizer. This is slow, so avoid calling it more often
248 // than necessary.
249 StringCanonicalizer() : buffer_size_(0) {
250 UErrorCode error_code = U_ZERO_ERROR;
251 collator_.reset(
252 icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
253 DCHECK(U_SUCCESS(error_code));
254 collator_->setStrength(icu::Collator::PRIMARY);
255 }
256
257 ~StringCanonicalizer() {}
258
259 // Returns a canonical version of the string that can be used for comparing
260 // strings regardless of diacritics and capitalization.
261 // Canonicalize("Texas") == Canonicalize("T\u00E9xas");
262 // Canonicalize("Texas") == Canonicalize("teXas");
263 // Canonicalize("Texas") != Canonicalize("California");
264 //
265 // The output is not human-readable.
266 // Canonicalize("Texas") != "Texas";
267 std::string Canonicalize(const std::string& original) const {
268 icu::UnicodeString icu_str(original.c_str(),
269 static_cast<int32_t>(original.length()));
270 int32_t required_size =
271 collator_->getSortKey(icu_str, buffer_.get(), buffer_size_);
272 if (required_size > buffer_size_) {
273 buffer_size_ = required_size * 2;
274 buffer_.reset(new uint8_t[buffer_size_]);
275 required_size =
276 collator_->getSortKey(icu_str, buffer_.get(), buffer_size_);
277 DCHECK_GE(buffer_size_, required_size);
278 }
279 return std::string(reinterpret_cast<const char*>(buffer_.get()));
280 }
281
282 private:
283 mutable int32_t buffer_size_;
284 mutable scoped_ptr<uint8_t[]> buffer_;
285 scoped_ptr<icu::Collator> collator_;
286
287 DISALLOW_COPY_AND_ASSIGN(StringCanonicalizer);
288 };
289
290 // All sub-regions of a COUNTRY level region, organized into tries for lookup by
291 // region name or key.
292 class InputSuggester::SubRegionData {
293 public:
294 // Adds the sub-regions of |country_region| into tries. Uses
295 // |shared_canonicalizer| for case and diacritic insensitive lookup of the
296 // sub-regions.
297 SubRegionData(const RegionData& country_region,
298 const StringCanonicalizer& shared_canonicalizer)
299 : canonicalizer_(shared_canonicalizer), smallest_region_size_(COUNTRY) {
300 DCHECK(!country_region.has_parent());
301
302 for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i)
303 field_tries_[static_cast<AddressField>(i)] = new FieldTries;
304
305 if (!country_region.sub_regions().empty())
306 AddSubRegionsOf(country_region, COUNTRY);
307 }
308
309 ~SubRegionData() { STLDeleteValues(&field_tries_); }
310
311 // Adds the suggestions for |user_input| into |suggestions| when user is
312 // typing in |focused_field|.
313 void BuildSuggestions(const AddressData& user_input,
314 AddressField focused_field,
315 ScopedVector<Suggestion>* suggestions) const {
316 // Do not suggest anything if there's no suggestion data for the focused
317 // field.
318 if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field)
319 return;
320
321 // Non-owned regions that match a field value by region key.
322 std::set<const RegionData*> regions_match_key;
323
324 // Non-owned regions that match a field value by region name.
325 std::set<const RegionData*> regions_match_name;
326
327 AddressSuggestions address_suggestions;
328 for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY;
329 ++i) {
330 AddressField address_field = static_cast<AddressField>(i);
331 AddressField parent_address_field = static_cast<AddressField>(i - 1);
332
333 const std::string& field_value = user_input.GetFieldValue(address_field);
334 const std::string& parent_field_value =
335 user_input.GetFieldValue(parent_address_field);
336
337 if (field_value.empty() &&
338 (address_field == ADMIN_AREA || parent_field_value.empty())) {
339 address_suggestions.AllRegionsMatchForField(address_field);
340 continue;
341 }
342
343 regions_match_key.clear();
344 regions_match_name.clear();
345
346 const std::string& canonical_field_value =
347 canonicalizer_.Canonicalize(field_value);
348 const FieldTries* field_tries = field_tries_.find(address_field)->second;
349 field_tries->keys.FindDataForKeyPrefix(canonical_field_value,
350 &regions_match_key);
351 field_tries->names.FindDataForKeyPrefix(canonical_field_value,
352 &regions_match_name);
353
354 bool added_suggestions = address_suggestions.AddRegions(
355 address_field, regions_match_key, regions_match_name);
356
357 // Do not suggest anything if the focused field does not have suggestions.
358 if (address_field == focused_field && !added_suggestions)
359 return;
360 }
361
362 address_suggestions.SwapSmallestSubRegionSuggestions(suggestions);
363 }
364
365 private:
366 // The tries to lookup regions for a specific field by keys and names. For
367 // example, the FieldTries for ADMIN_AREA in US will have keys for "AL", "AK",
368 // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The
369 // struct is uncopyable due to Trie objects being uncopyable.
370 struct FieldTries {
371 Trie<const RegionData*> keys;
372 Trie<const RegionData*> names;
373 };
374
375 // Adds the sub-regions of |parent_region| into tries.
376 void AddSubRegionsOf(const RegionData& parent_region,
377 AddressField parent_field) {
378 DCHECK(!parent_region.sub_regions().empty());
379
380 AddressField address_field = static_cast<AddressField>(parent_field + 1);
381 DCHECK(address_field >= ADMIN_AREA);
382 DCHECK(address_field <= DEPENDENT_LOCALITY);
383
384 FieldTries* field_tries = field_tries_[address_field];
385 for (std::vector<const RegionData*>::const_iterator it =
386 parent_region.sub_regions().begin();
387 it != parent_region.sub_regions().end();
388 ++it) {
389 const RegionData* region = *it;
390 DCHECK(region);
391
392 field_tries->keys.AddDataForKey(
393 canonicalizer_.Canonicalize(region->key()), region);
394 field_tries->names.AddDataForKey(
395 canonicalizer_.Canonicalize(region->name()), region);
396
397 if (smallest_region_size_ < address_field)
398 smallest_region_size_ = address_field;
399
400 if (!region->sub_regions().empty())
401 AddSubRegionsOf(*region, address_field);
402 }
403 }
404
405 // Owned tries to lookup regions for ADMIN_AREA, LOCALITY, and
406 // DEPENDENT_LOCALITY.
407 std::map<AddressField, FieldTries*> field_tries_;
408
409 // The smallest size of a sub-region that has data. For example, this is
410 // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN.
411 AddressField smallest_region_size_;
412
413 // A shared instance of string canonicalizer for case and diacritic comparison
414 // of region keys and names.
415 const StringCanonicalizer& canonicalizer_;
416
417 DISALLOW_COPY_AND_ASSIGN(SubRegionData);
418 };
419
420 InputSuggester::InputSuggester(PreloadSupplier* supplier)
421 : region_data_builder_(supplier),
422 input_helper_(supplier),
423 validator_(supplier),
424 validated_(BuildCallback(this, &InputSuggester::Validated)) {}
425
426 InputSuggester::~InputSuggester() {
427 STLDeleteValues(&sub_regions_);
428 }
429
430 void InputSuggester::GetSuggestions(const AddressData& user_input,
431 AddressField focused_field,
432 size_t suggestions_limit,
433 std::vector<AddressData>* suggestions) {
434 DCHECK(suggestions);
435 DCHECK(focused_field == POSTAL_CODE ||
436 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
437
438 AddressData address_copy = user_input;
439
440 // Do not suggest anything if the user input is empty.
441 if (address_copy.IsFieldEmpty(focused_field))
442 return;
443
444 if (focused_field == POSTAL_CODE) {
445 // Do not suggest anything if the user is typing an invalid postal code.
446 FieldProblemMap problems;
447 FieldProblemMap filter;
448 filter.insert(std::make_pair(POSTAL_CODE, INVALID_FORMAT));
449 validator_.Validate(address_copy,
450 true, // Allow postal office boxes.
451 false, // Do not require recipient name.
452 &filter,
453 &problems,
454 *validated_);
455 if (!problems.empty())
456 return;
457
458 // Fill in the sub-regions based on the postal code.
459 input_helper_.FillAddress(&address_copy);
460 }
461
462 // Lazily initialize the mapping from COUNTRY level regions to all of their
463 // sub-regions with metadata for generating suggestions.
464 std::string unused_best_language;
465 const RegionData& region_data =
466 region_data_builder_.Build(address_copy.region_code,
467 address_copy.language_code,
468 &unused_best_language);
469 std::map<const RegionData*, const SubRegionData*>::iterator
470 sub_region_data_it = sub_regions_.find(&region_data);
471 if (sub_region_data_it == sub_regions_.end()) {
472 if (!canonicalizer_) {
473 canonicalizer_.reset(new StringCanonicalizer);
474 }
475 sub_region_data_it =
476 sub_regions_.insert(std::make_pair(&region_data,
477 new SubRegionData(region_data,
478 *canonicalizer_)))
479 .first;
480 }
481 DCHECK(sub_region_data_it->second);
482
483 // Build the list of regions that match |address_copy| when the user is typing
484 // in the |focused_field|.
485 ScopedVector<Suggestion> suggested_regions;
486 sub_region_data_it->second->BuildSuggestions(
487 address_copy, focused_field, &suggested_regions);
488
489 FieldProblemMap problems;
490 FieldProblemMap filter;
491 filter.insert(std::make_pair(POSTAL_CODE, MISMATCHING_VALUE));
492
493 // Generate suggestions based on the regions.
494 for (ScopedVector<Suggestion>::const_iterator suggested_region_it =
495 suggested_regions.begin();
496 suggested_region_it != suggested_regions.end();
497 ++suggested_region_it) {
498 Suggestion* suggested_region = *suggested_region_it;
499
500 AddressData address;
501 address.region_code = address_copy.region_code;
502 address.postal_code = address_copy.postal_code;
503
504 // Traverse the tree of regions from the smallest |region_to_suggest| to the
505 // country-wide "root" of the tree. Use the region names or keys found at
506 // each of the levels of the tree to build the |address| to suggest.
507 AddressField address_field = suggested_region->matching_address_field;
508 for (const RegionData* region = suggested_region->region_to_suggest;
509 region->has_parent();
510 region = &region->parent()) {
511 address.SetFieldValue(address_field,
512 suggested_region->region_key_matches
513 ? region->key()
514 : region->name());
515 address_field = static_cast<AddressField>(address_field - 1);
516 }
517
518 // Do not suggest an address with a mismatching postal code.
519 problems.clear();
520 validator_.Validate(address_copy,
521 true, // Allow postal office boxes.
522 false, // Do not require recipient name.
523 &filter,
524 &problems,
525 *validated_);
526 if (!problems.empty())
527 continue;
528
529 // Do not add more suggestions than |suggestions_limit|.
530 if (suggestions->size() >= suggestions_limit) {
531 suggestions->clear();
532 return;
533 }
534
535 suggestions->push_back(address);
536 }
537 }
538
539 void InputSuggester::Validated(bool success,
540 const AddressData&,
541 const FieldProblemMap&) {
542 DCHECK(success);
543 }
544
545 } // namespace autofill
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698