Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(28)

Side by Side Diff: third_party/libaddressinput/chromium/input_suggester.cc

Issue 298863012: Use upstream libaddressinput in Chrome. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Whitelist strings used on all platforms. Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/libaddressinput/chromium/input_suggester.h"
6
7 #include <map>
8 #include <set>
9 #include <utility>
10
11 #include "base/basictypes.h"
12 #include "base/logging.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "base/memory/scoped_vector.h"
15 #include "base/stl_util.h"
16 #include "third_party/icu/source/i18n/unicode/coll.h"
17 #include "third_party/libaddressinput/chromium/trie.h"
18 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da ta.h"
19 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/callback.h "
20 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_su pplier.h"
21 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a.h"
22 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a_builder.h"
23
24 namespace autofill {
25
26 using ::i18n::addressinput::AddressData;
27 using ::i18n::addressinput::AddressField;
28 using ::i18n::addressinput::BuildCallback;
29 using ::i18n::addressinput::FieldProblemMap;
30 using ::i18n::addressinput::PreloadSupplier;
31 using ::i18n::addressinput::RegionData;
32 using ::i18n::addressinput::RegionDataBuilder;
33
34 using ::i18n::addressinput::ADMIN_AREA;
35 using ::i18n::addressinput::COUNTRY;
36 using ::i18n::addressinput::DEPENDENT_LOCALITY;
37 using ::i18n::addressinput::LOCALITY;
38 using ::i18n::addressinput::POSTAL_CODE;
39
40 using ::i18n::addressinput::INVALID_FORMAT;
41 using ::i18n::addressinput::MISMATCHING_VALUE;
42
43 namespace {
44
45 // A region and its metadata useful for constructing a suggestion. The object is
46 // immutable and uncopyable.
47 struct Suggestion {
48 public:
49 // Builds a suggestion of |region_to_suggest|. Does not take ownership of
50 // |region_to_suggest|, which should not be NULL. At least one of
51 // |region_key_matches| and |region_name_names| should be true, otherwise it's
52 // not a valid suggestion.
53 Suggestion(const RegionData* region_to_suggest,
54 AddressField matching_address_field,
55 bool region_key_matches,
56 bool region_name_matches)
57 : region_to_suggest(region_to_suggest),
58 matching_address_field(matching_address_field),
59 region_key_matches(region_key_matches),
60 region_name_matches(region_name_matches) {
61 DCHECK(region_to_suggest);
62 DCHECK(region_key_matches || region_name_matches);
63 }
64
65 ~Suggestion() {}
66
67 // The region that should be suggested. For example, if
68 // |region_to_suggest->name()| is "California", then "California" or "CA"
69 // can be suggested.
70 const RegionData* const region_to_suggest;
71
72 // The field in the address for which the suggestion should be made. For
73 // example, ADMIN_AREA in US means the suggestion should be made for the field
74 // labeled "State".
75 const AddressField matching_address_field;
76
77 // True if the the key of the region matches user input and can be used in
78 // suggestion. For example, if this is true and |region_to_suggest->key()| is
79 // "CA", then "CA" cab be suggested.
80 const bool region_key_matches;
81
82 // True if the name of the region matches user input and can be used in
83 // suggestion. For example, if this is true and |region_to_suggest->name()| is
84 // "California", then "California" can be suggested.
85 const bool region_name_matches;
Evan Stade 2014/06/25 03:37:16 this isn't used anywhere
please use gerrit instead 2014/06/25 20:41:15 Removed this var and expanded the description of "
86 };
87
88 // Suggestions for an address. Contains lists of suggestions for administrative
89 // area, locality, and dependent locality fields of an address.
90 class AddressSuggestions {
91 public:
92 AddressSuggestions() {
93 for (int i = ADMIN_AREA; i <= LOCALITY; ++i) {
94 regions_match_input_[static_cast<AddressField>(i)] =
95 new RegionsMatchInput;
96 }
97 for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i)
98 suggestions_[static_cast<AddressField>(i)] = new ScopedVector<Suggestion>;
99 }
100
101 ~AddressSuggestions() {
102 STLDeleteValues(&regions_match_input_);
103 STLDeleteValues(&suggestions_);
104 }
105
106 // Marks all regions at |address_field| level as matching user input.
107 void AllRegionsMatchForField(AddressField address_field) {
108 all_regions_match_input_.insert(address_field);
109 }
110
111 // Marks given regions at |address_field| level as matching user input. The
112 // |regions_match_key| parameter contains the regions that match user input by
113 // their keys. The |regions_match_name| parameter contains the regions that
114 // match user input by their names.
115 //
116 // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or
117 // DEPENDENT_LOCALITY.
118 bool AddRegions(AddressField address_field,
119 const std::set<const RegionData*>& regions_match_key,
120 const std::set<const RegionData*>& regions_match_name) {
121 DCHECK(address_field >= ADMIN_AREA);
122 DCHECK(address_field <= DEPENDENT_LOCALITY);
123
124 AddressField parent_address_field =
125 static_cast<AddressField>(address_field - 1);
126
127 bool all_parents_match =
128 parent_address_field == COUNTRY ||
129 all_regions_match_input_.find(parent_address_field) !=
130 all_regions_match_input_.end();
131
132 // Cannot build |address_field| level suggestions if there are no matches in
133 // |parent_address_field| level regions.
134 const RegionsMatchInput* parents = NULL;
135 if (address_field > ADMIN_AREA && !all_parents_match) {
136 parents = regions_match_input_[parent_address_field];
137 if (parents->keys.empty() && parents->names.empty())
138 return false;
139 }
140
141 RegionsMatchInput* regions = NULL;
142 if (address_field < DEPENDENT_LOCALITY)
143 regions = regions_match_input_[address_field];
144
145 ScopedVector<Suggestion>* suggestions = suggestions_[address_field];
146 bool added_suggestions = false;
147
148 // Iterate over both |regions_match_key| and |regions_match_name| and build
149 // Suggestion objects based on the given RegionData objects. Advance either
150 // one iterator at a time (if they point to different data) or both
151 // iterators at once (if they point to the same data).
152 for (std::set<const RegionData*>::const_iterator
153 key_it = regions_match_key.begin(),
154 name_it = regions_match_name.begin();
155 key_it != regions_match_key.end() ||
156 name_it != regions_match_name.end();) {
157 const RegionData* key_region =
158 key_it != regions_match_key.end() ? *key_it : NULL;
159 const RegionData* name_region =
160 name_it != regions_match_name.end() ? *name_it : NULL;
161
162 // Regions that do not have a parent that also matches input will not
163 // become a suggestion.
164 bool key_region_has_parent =
165 all_parents_match ||
166 (parents && !parents->keys.empty() && key_region &&
167 parents->keys.find(&key_region->parent()) != parents->keys.end());
168 bool name_region_has_parent =
169 all_parents_match ||
170 (parents && !parents->names.empty() && name_region &&
171 parents->names.find(&name_region->parent()) != parents->names.end());
172
173 if (name_region && (!key_region || name_region < key_region)) {
174 if (name_region_has_parent) {
175 suggestions->push_back(
176 new Suggestion(name_region, address_field, false, true));
177 added_suggestions = true;
178 if (regions)
179 regions->names.insert(name_region);
180 }
181
182 ++name_it;
183 } else if (key_region && (!name_region || key_region < name_region)) {
184 if (key_region_has_parent) {
185 suggestions->push_back(
186 new Suggestion(key_region, address_field, true, false));
187 added_suggestions = true;
188 if (regions)
189 regions->keys.insert(key_region);
190 }
191
192 ++key_it;
193 } else {
194 if (key_region_has_parent) {
195 suggestions->push_back(
196 new Suggestion(key_region, address_field, true, true));
197 added_suggestions = true;
198 if (regions) {
199 regions->keys.insert(key_region);
200 regions->names.insert(name_region);
201 }
202 }
203
204 ++key_it;
205 ++name_it;
206 }
207 }
208
209 return added_suggestions;
210 }
211
212 // Swaps the suggestions for the smallest sub-region into |suggestions|. This
213 // object is not usable after this call due to using the swap() operation.
214 //
215 // The |suggestions| parameter should not be NULL.
216 void SwapSmallestSubRegionSuggestions(ScopedVector<Suggestion>* suggestions) {
217 DCHECK(suggestions);
218 for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) {
219 ScopedVector<Suggestion>* result =
220 suggestions_[static_cast<AddressField>(i)];
221 if (!result->empty()) {
222 result->swap(*suggestions);
223 return;
224 }
225 }
226 }
227
228 private:
229 // The sets of non-owned regions used for looking up regions that match user
230 // input by keys and names.
231 struct RegionsMatchInput {
232 std::set<const RegionData*> keys;
233 std::set<const RegionData*> names;
234 };
235
236 // The owned sets of regions that match user input at ADMIN_AREA and LOCALITY
237 // levels.
238 std::map<AddressField, RegionsMatchInput*> regions_match_input_;
239
240 // The set of fields for which all regions match user input. Used to avoid
241 // storing a long list in |regions_match_input_| and later looking it up
242 // there.
243 std::set<AddressField> all_regions_match_input_;
244
245 // The owned vectors of suggestions at ADMIN_AREA, LOCALITY, and
246 // DEPENDENT_LOCALITY levels.
247 std::map<AddressField, ScopedVector<Suggestion>*> suggestions_;
Evan Stade 2014/06/25 03:37:16 why ScopedVector* instead of ScopedVector? Why Sc
please use gerrit instead 2014/06/25 20:41:15 I'd rather not copy anything to avoid accidentally
Evan Stade 2014/06/26 00:46:15 Yes, the code is more complex (explicit memory man
please use gerrit instead 2014/06/26 22:49:49 Done.
248
249 DISALLOW_COPY_AND_ASSIGN(AddressSuggestions);
250 };
251
252 } // namespace
253
254 // Canonicalizes strings for case and diacritic insensitive comparison.
255 class StringCanonicalizer {
256 public:
257 // Initializes the canonicalizer. This is slow, so avoid calling it more often
258 // than necessary.
259 StringCanonicalizer() {
260 UErrorCode error_code = U_ZERO_ERROR;
261 collator_.reset(
262 icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
263 DCHECK(U_SUCCESS(error_code));
264 collator_->setStrength(icu::Collator::PRIMARY);
265 }
266
267 ~StringCanonicalizer() {}
268
269 // Returns a canonical version of the string that can be used for comparing
270 // strings regardless of diacritics and capitalization.
271 // Canonicalize("Texas") == Canonicalize("T\u00E9xas");
272 // Canonicalize("Texas") == Canonicalize("teXas");
273 // Canonicalize("Texas") != Canonicalize("California");
274 //
275 // The output is not human-readable.
276 // Canonicalize("Texas") != "Texas";
277 std::string Canonicalize(const std::string& original) const {
Evan Stade 2014/06/25 03:37:16 this function makes me queasy...
please use gerrit instead 2014/06/25 20:41:15 Changed to maintain a buffer between calls to Cano
Evan Stade 2014/06/26 00:46:15 Allocating the buffer isn't what worried me. It's
please use gerrit instead 2014/06/26 22:49:50 Done.
278 icu::UnicodeString icu_str(original.c_str(),
279 static_cast<int32_t>(original.length()));
280 int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0);
281 scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
282 DCHECK(buffer.get());
283 int32_t filled_size =
284 collator_->getSortKey(icu_str, buffer.get(), buffer_size);
285 DCHECK_EQ(buffer_size, filled_size);
286 return std::string(reinterpret_cast<const char*>(buffer.get()));
287 }
288
289 private:
290 scoped_ptr<icu::Collator> collator_;
291
292 DISALLOW_COPY_AND_ASSIGN(StringCanonicalizer);
293 };
294
295 // All sub-regions of a COUNTRY level region, organized into tries for lookup by
296 // region name or key.
297 class InputSuggester::SubRegionData {
298 public:
299 // Adds the sub-regions of |country_region| into tries. Uses
300 // |shared_canonicalizer| for case and diacritic insensitive lookup of the
301 // sub-regions.
302 SubRegionData(const RegionData& country_region,
303 const StringCanonicalizer& shared_canonicalizer)
304 : canonicalizer_(shared_canonicalizer), smallest_region_size_(COUNTRY) {
305 DCHECK(!country_region.has_parent());
306
307 for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i)
308 field_tries_[static_cast<AddressField>(i)] = new FieldTries;
309
310 if (!country_region.sub_regions().empty())
311 AddSubRegionsOf(country_region, COUNTRY);
312 }
313
314 ~SubRegionData() { STLDeleteValues(&field_tries_); }
315
316 // Adds the suggestions for |user_input| into |suggestions| when user is
317 // typing in |focused_field|.
318 void BuildSuggestions(const AddressData& user_input,
319 AddressField focused_field,
320 ScopedVector<Suggestion>* suggestions) const {
321 // Do not suggest anything if there's no suggestion data for the focused
322 // field.
323 if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field)
324 return;
325
326 // Non-owned regions that match a field value by region key.
327 std::set<const RegionData*> regions_match_key;
328
329 // Non-owned regions that match a field value by region name.
330 std::set<const RegionData*> regions_match_name;
331
332 AddressSuggestions address_suggestions;
333 for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY;
334 ++i) {
335 AddressField address_field = static_cast<AddressField>(i);
336 AddressField parent_address_field = static_cast<AddressField>(i - 1);
337
338 const std::string& field_value = user_input.GetFieldValue(address_field);
339 const std::string& parent_field_value =
340 user_input.GetFieldValue(parent_address_field);
341
342 if (field_value.empty() &&
343 (address_field == ADMIN_AREA || parent_field_value.empty())) {
344 address_suggestions.AllRegionsMatchForField(address_field);
345 continue;
346 }
347
348 regions_match_key.clear();
349 regions_match_name.clear();
350
351 const std::string& canonical_field_value =
352 canonicalizer_.Canonicalize(field_value);
353 const FieldTries* field_tries = field_tries_.find(address_field)->second;
354 field_tries->keys.FindDataForKeyPrefix(canonical_field_value,
355 &regions_match_key);
356 field_tries->names.FindDataForKeyPrefix(canonical_field_value,
357 &regions_match_name);
358
359 bool added_suggestions = address_suggestions.AddRegions(
360 address_field, regions_match_key, regions_match_name);
361
362 // Do not suggest anything if the focused field does not have suggestions.
363 if (address_field == focused_field && !added_suggestions)
364 return;
365 }
366
367 address_suggestions.SwapSmallestSubRegionSuggestions(suggestions);
368 }
369
370 private:
371 // The tries to lookup regions for a specific field by keys and names. For
372 // example, the FieldTries for ADMIN_AREA in US will have keys for "AL", "AK",
373 // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The
374 // struct is uncopyable due to Trie objects being uncopyable.
375 struct FieldTries {
376 Trie<const RegionData*> keys;
377 Trie<const RegionData*> names;
378 };
379
380 // Adds the sub-regions of |parent_region| into tries.
381 void AddSubRegionsOf(const RegionData& parent_region,
382 AddressField parent_field) {
383 DCHECK(!parent_region.sub_regions().empty());
384
385 AddressField address_field = static_cast<AddressField>(parent_field + 1);
386 DCHECK(address_field >= ADMIN_AREA);
387 DCHECK(address_field <= DEPENDENT_LOCALITY);
388
389 FieldTries* field_tries = field_tries_[address_field];
390 for (std::vector<const RegionData*>::const_iterator it =
391 parent_region.sub_regions().begin();
392 it != parent_region.sub_regions().end();
393 ++it) {
394 const RegionData* region = *it;
395 DCHECK(region);
396
397 field_tries->keys.AddDataForKey(
398 canonicalizer_.Canonicalize(region->key()), region);
399 field_tries->names.AddDataForKey(
400 canonicalizer_.Canonicalize(region->name()), region);
401
402 if (smallest_region_size_ < address_field)
403 smallest_region_size_ = address_field;
404
405 if (!region->sub_regions().empty())
406 AddSubRegionsOf(*region, address_field);
407 }
408 }
409
410 // Owned tries to lookup regions for ADMIN_AREA, LOCALITY, and
411 // DEPENDENT_LOCALITY.
412 std::map<AddressField, FieldTries*> field_tries_;
413
414 // The smallest size of a sub-region that has data. For example, this is
415 // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN.
416 AddressField smallest_region_size_;
417
418 // A shared instance of string canonicalizer for case and diacritic comparison
419 // of region keys and names.
420 const StringCanonicalizer& canonicalizer_;
421
422 DISALLOW_COPY_AND_ASSIGN(SubRegionData);
423 };
424
425 InputSuggester::InputSuggester(PreloadSupplier* supplier)
426 : region_data_builder_(supplier),
427 input_helper_(supplier),
428 validator_(supplier),
429 validated_(BuildCallback(this, &InputSuggester::Validated)) {}
430
431 InputSuggester::~InputSuggester() {
432 STLDeleteValues(&sub_regions_);
433 }
434
435 void InputSuggester::GetSuggestions(const AddressData& user_input,
436 AddressField focused_field,
437 size_t suggestions_limit,
438 std::vector<AddressData>* suggestions) {
439 DCHECK(suggestions);
440 DCHECK(focused_field == POSTAL_CODE ||
441 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
442
443 AddressData address_copy = user_input;
444
445 // Do not suggest anything if the user input is empty.
446 if (address_copy.IsFieldEmpty(focused_field))
447 return;
448
449 if (focused_field == POSTAL_CODE) {
450 // Do not suggest anything if the user is typing an invalid postal code.
451 FieldProblemMap problems;
452 FieldProblemMap filter;
453 filter.insert(std::make_pair(POSTAL_CODE, INVALID_FORMAT));
454 validator_.Validate(address_copy,
455 true, // Allow postal office boxes.
456 false, // Do not require recipient name.
457 &filter,
458 &problems,
459 *validated_);
460 if (!problems.empty())
461 return;
462
463 // Fill in the sub-regions based on the postal code.
464 input_helper_.FillAddress(&address_copy);
465 }
466
467 // Lazily initialize the mapping from COUNTRY level regions to all of their
468 // sub-regions with metadata for generating suggestions.
469 std::string unused_best_language;
470 const RegionData& region_data =
471 region_data_builder_.Build(address_copy.region_code,
472 address_copy.language_code,
473 &unused_best_language);
474 std::map<const RegionData*, const SubRegionData*>::iterator
475 sub_region_data_it = sub_regions_.find(&region_data);
476 if (sub_region_data_it == sub_regions_.end()) {
477 if (!canonicalizer_) {
478 canonicalizer_.reset(new StringCanonicalizer);
479 }
480 sub_region_data_it =
481 sub_regions_.insert(std::make_pair(&region_data,
482 new SubRegionData(region_data,
483 *canonicalizer_)))
484 .first;
485 }
486 DCHECK(sub_region_data_it->second);
487
488 // Build the list of regions that match |address_copy| when the user is typing
489 // in the |focused_field|.
490 ScopedVector<Suggestion> suggested_regions;
491 sub_region_data_it->second->BuildSuggestions(
492 address_copy, focused_field, &suggested_regions);
493
494 FieldProblemMap problems;
495 FieldProblemMap filter;
496 filter.insert(std::make_pair(POSTAL_CODE, MISMATCHING_VALUE));
497
498 // Generate suggestions based on the regions.
499 for (ScopedVector<Suggestion>::const_iterator suggested_region_it =
500 suggested_regions.begin();
501 suggested_region_it != suggested_regions.end();
502 ++suggested_region_it) {
503 Suggestion* suggested_region = *suggested_region_it;
504
505 AddressData address;
506 address.region_code = address_copy.region_code;
507 address.postal_code = address_copy.postal_code;
508
509 // Traverse the tree of regions from the smallest |region_to_suggest| to the
510 // country-wide "root" of the tree. Use the region names or keys found at
511 // each of the levels of the tree to build the |address| to suggest.
512 AddressField address_field = suggested_region->matching_address_field;
513 for (const RegionData* region = suggested_region->region_to_suggest;
514 region->has_parent();
515 region = &region->parent()) {
516 address.SetFieldValue(address_field,
517 suggested_region->region_key_matches
518 ? region->key()
519 : region->name());
520 address_field = static_cast<AddressField>(address_field - 1);
521 }
522
523 // Do not suggest an address with a mismatching postal code.
524 problems.clear();
525 validator_.Validate(address_copy,
526 true, // Allow postal office boxes.
527 false, // Do not require recipient name.
528 &filter,
529 &problems,
530 *validated_);
531 if (!problems.empty())
532 continue;
533
534 // Do not add more suggestions than |suggestions_limit|.
535 if (suggestions->size() >= suggestions_limit) {
536 suggestions->clear();
537 return;
538 }
539
540 suggestions->push_back(address);
541 }
542 }
543
544 void InputSuggester::Validated(bool success,
545 const AddressData&,
546 const FieldProblemMap&) {
547 DCHECK(success);
548 }
549
550 } // namespace autofill
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698