Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(472)

Side by Side Diff: third_party/libaddressinput/chromium/input_suggester.cc

Issue 298863012: Use upstream libaddressinput in Chrome. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Specify the string size in canonicalizer. Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/libaddressinput/chromium/input_suggester.h"
6
7 #include <map>
8 #include <set>
9 #include <utility>
10
11 #include "base/basictypes.h"
12 #include "base/logging.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "base/stl_util.h"
15 #include "third_party/icu/source/i18n/unicode/coll.h"
16 #include "third_party/libaddressinput/chromium/trie.h"
17 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da ta.h"
18 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/callback.h "
19 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_su pplier.h"
20 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a.h"
21 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a_builder.h"
22
23 namespace autofill {
24
25 using ::i18n::addressinput::AddressData;
26 using ::i18n::addressinput::AddressField;
27 using ::i18n::addressinput::BuildCallback;
28 using ::i18n::addressinput::FieldProblemMap;
29 using ::i18n::addressinput::PreloadSupplier;
30 using ::i18n::addressinput::RegionData;
31 using ::i18n::addressinput::RegionDataBuilder;
32
33 using ::i18n::addressinput::ADMIN_AREA;
34 using ::i18n::addressinput::COUNTRY;
35 using ::i18n::addressinput::DEPENDENT_LOCALITY;
36 using ::i18n::addressinput::LOCALITY;
37 using ::i18n::addressinput::POSTAL_CODE;
38
39 using ::i18n::addressinput::INVALID_FORMAT;
40 using ::i18n::addressinput::MISMATCHING_VALUE;
41
42 namespace {
43
44 // A region and its metadata useful for constructing a suggestion.
45 struct Suggestion {
46 public:
47 // Builds a suggestion of |region_to_suggest|. Does not take ownership of
48 // |region_to_suggest|, which should not be NULL.
49 Suggestion(const RegionData* region_to_suggest,
50 AddressField matching_address_field,
51 bool region_key_matches)
52 : region_to_suggest(region_to_suggest),
53 matching_address_field(matching_address_field),
54 region_key_matches(region_key_matches) {
55 DCHECK(region_to_suggest);
56 }
57
58 ~Suggestion() {}
59
60 // The region that should be suggested. For example, if the region is ("CA",
61 // "California"), then either "CA" or "California" should be suggested.
62 const RegionData* region_to_suggest;
63
64 // The field in the address for which the suggestion should be made. For
65 // example, ADMIN_AREA in US means the suggestion should be made for the field
66 // labeled "State".
67 AddressField matching_address_field;
68
69 // True if the key of the region matches user input (the name may or may not
70 // match). "CA" should be suggested for a ("CA", "California") region.
71 //
72 // False if only the name of the region matches user input (key does not
73 // match). "California" should be suggested fir a ("CA", "California") region.
74 bool region_key_matches;
75 };
76
77 // Suggestions for an address. Contains lists of suggestions for administrative
78 // area, locality, and dependent locality fields of an address.
79 class AddressSuggestions {
80 public:
81 AddressSuggestions() {
82 for (int i = ADMIN_AREA; i <= LOCALITY; ++i) {
83 regions_match_input_[static_cast<AddressField>(i)] =
84 new RegionsMatchInput;
85 }
86 }
87
88 ~AddressSuggestions() { STLDeleteValues(&regions_match_input_); }
89
90 // Marks all regions at |address_field| level as matching user input.
91 void AllRegionsMatchForField(AddressField address_field) {
92 all_regions_match_input_.insert(address_field);
93 }
94
95 // Marks given regions at |address_field| level as matching user input. The
96 // |regions_match_key| parameter contains the regions that match user input by
97 // their keys. The |regions_match_name| parameter contains the regions that
98 // match user input by their names.
99 //
100 // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or
101 // DEPENDENT_LOCALITY.
102 bool AddRegions(AddressField address_field,
103 const std::set<const RegionData*>& regions_match_key,
104 const std::set<const RegionData*>& regions_match_name) {
105 DCHECK(address_field >= ADMIN_AREA);
106 DCHECK(address_field <= DEPENDENT_LOCALITY);
107
108 AddressField parent_address_field =
109 static_cast<AddressField>(address_field - 1);
110
111 bool all_parents_match =
112 parent_address_field == COUNTRY ||
113 all_regions_match_input_.find(parent_address_field) !=
114 all_regions_match_input_.end();
115
116 // Cannot build |address_field| level suggestions if there are no matches in
117 // |parent_address_field| level regions.
118 const RegionsMatchInput* parents = NULL;
119 if (address_field > ADMIN_AREA && !all_parents_match) {
120 parents = regions_match_input_[parent_address_field];
121 if (parents->keys.empty() && parents->names.empty())
122 return false;
123 }
124
125 RegionsMatchInput* regions = NULL;
126 if (address_field < DEPENDENT_LOCALITY)
127 regions = regions_match_input_[address_field];
128
129 std::vector<Suggestion>& suggestions = suggestions_[address_field];
130 bool added_suggestions = false;
131
132 // Iterate over both |regions_match_key| and |regions_match_name| and build
133 // Suggestion objects based on the given RegionData objects. Advance either
134 // one iterator at a time (if they point to different data) or both
135 // iterators at once (if they point to the same data).
136 for (std::set<const RegionData*>::const_iterator
137 key_it = regions_match_key.begin(),
138 name_it = regions_match_name.begin();
139 key_it != regions_match_key.end() ||
140 name_it != regions_match_name.end();) {
141 const RegionData* key_region =
142 key_it != regions_match_key.end() ? *key_it : NULL;
143 const RegionData* name_region =
144 name_it != regions_match_name.end() ? *name_it : NULL;
145
146 // Regions that do not have a parent that also matches input will not
147 // become a suggestion.
148 bool key_region_has_parent =
149 all_parents_match ||
150 (parents && !parents->keys.empty() && key_region &&
151 parents->keys.find(&key_region->parent()) != parents->keys.end());
152 bool name_region_has_parent =
153 all_parents_match ||
154 (parents && !parents->names.empty() && name_region &&
155 parents->names.find(&name_region->parent()) != parents->names.end());
156
157 if (name_region && (!key_region || name_region < key_region)) {
158 if (name_region_has_parent) {
159 suggestions.push_back(Suggestion(name_region, address_field, false));
160 added_suggestions = true;
161 if (regions)
162 regions->names.insert(name_region);
163 }
164
165 ++name_it;
166 } else if (key_region && (!name_region || key_region < name_region)) {
167 if (key_region_has_parent) {
168 suggestions.push_back(Suggestion(key_region, address_field, true));
169 added_suggestions = true;
170 if (regions)
171 regions->keys.insert(key_region);
172 }
173
174 ++key_it;
175 } else {
176 if (key_region_has_parent) {
177 suggestions.push_back(Suggestion(key_region, address_field, true));
178 added_suggestions = true;
179 if (regions) {
180 regions->keys.insert(key_region);
181 regions->names.insert(name_region);
182 }
183 }
184
185 ++key_it;
186 ++name_it;
187 }
188 }
189
190 return added_suggestions;
191 }
192
193 // Swaps the suggestions for the smallest sub-region into |suggestions|. This
194 // object is not usable after this call due to using the swap() operation.
Evan Stade 2014/06/27 01:26:58 nit: s/this object/|this|
please use gerrit instead 2014/06/27 08:43:38 Done.
195 //
196 // The |suggestions| parameter should not be NULL.
197 void SwapSmallestSubRegionSuggestions(std::vector<Suggestion>* suggestions) {
198 DCHECK(suggestions);
199 for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) {
200 std::vector<Suggestion>& result =
201 suggestions_[static_cast<AddressField>(i)];
202 if (!result.empty()) {
203 result.swap(*suggestions);
Evan Stade 2014/06/27 01:26:58 nit: suggestions->swap(result), imo, is easier to
please use gerrit instead 2014/06/27 08:43:38 Done.
204 return;
205 }
206 }
207 }
208
209 private:
210 // The sets of non-owned regions used for looking up regions that match user
211 // input by keys and names.
212 struct RegionsMatchInput {
213 std::set<const RegionData*> keys;
214 std::set<const RegionData*> names;
215 };
216
217 // The owned sets of regions that match user input at ADMIN_AREA and LOCALITY
218 // levels.
219 std::map<AddressField, RegionsMatchInput*> regions_match_input_;
220
221 // The set of fields for which all regions match user input. Used to avoid
222 // storing a long list in |regions_match_input_| and later looking it up
223 // there.
224 std::set<AddressField> all_regions_match_input_;
225
226 // Suggestions at ADMIN_AREA, LOCALITY, and DEPENDENT_LOCALITY levels.
227 std::map<AddressField, std::vector<Suggestion> > suggestions_;
228
229 DISALLOW_COPY_AND_ASSIGN(AddressSuggestions);
230 };
231
232 } // namespace
233
234 // Canonicalizes strings for case and diacritic insensitive comparison.
235 class StringCanonicalizer {
236 public:
237 // Initializes the canonicalizer. This is slow, so avoid calling it more often
238 // than necessary.
239 StringCanonicalizer() : buffer_size_(0) {
Evan Stade 2014/06/27 01:26:58 should be initialized to something greater than 0
please use gerrit instead 2014/06/27 08:43:38 Done.
240 UErrorCode error_code = U_ZERO_ERROR;
241 collator_.reset(
242 icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
243 DCHECK(U_SUCCESS(error_code));
244 collator_->setStrength(icu::Collator::PRIMARY);
245 }
246
247 ~StringCanonicalizer() {}
248
249 // Returns a canonical version of the string that can be used for comparing
250 // strings regardless of diacritics and capitalization.
251 // Canonicalize("Texas") == Canonicalize("T\u00E9xas");
252 // Canonicalize("Texas") == Canonicalize("teXas");
253 // Canonicalize("Texas") != Canonicalize("California");
254 //
255 // The output is not human-readable.
256 // Canonicalize("Texas") != "Texas";
257 std::string Canonicalize(const std::string& original) const {
258 DCHECK(!original.empty());
259 icu::UnicodeString icu_str(original.c_str(), original.length());
260 int32_t sort_key_size =
261 collator_->getSortKey(icu_str, buffer_.get(), buffer_size_);
262 DCHECK_LT(0, sort_key_size);
263 if (sort_key_size > buffer_size_) {
264 buffer_size_ = sort_key_size * 2;
265 buffer_.reset(new uint8_t[buffer_size_]);
266 sort_key_size =
267 collator_->getSortKey(icu_str, buffer_.get(), buffer_size_);
268 DCHECK_LT(0, sort_key_size);
269 DCHECK_GT(buffer_size_, sort_key_size);
270 }
271 return std::string(reinterpret_cast<const char*>(buffer_.get()),
272 sort_key_size - 1);
273 }
274
275 private:
276 mutable int32_t buffer_size_;
277 mutable scoped_ptr<uint8_t[]> buffer_;
278 scoped_ptr<icu::Collator> collator_;
279
280 DISALLOW_COPY_AND_ASSIGN(StringCanonicalizer);
281 };
282
283 // All sub-regions of a COUNTRY level region, organized into tries for lookup by
284 // region name or key.
285 class InputSuggester::SubRegionData {
286 public:
287 // Adds the sub-regions of |country_region| into tries. Uses
288 // |shared_canonicalizer| for case and diacritic insensitive lookup of the
289 // sub-regions.
290 SubRegionData(const RegionData& country_region,
291 const StringCanonicalizer& shared_canonicalizer)
292 : canonicalizer_(shared_canonicalizer), smallest_region_size_(COUNTRY) {
293 DCHECK(!country_region.has_parent());
294
295 for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i)
296 field_tries_[static_cast<AddressField>(i)] = new FieldTries;
297
298 if (!country_region.sub_regions().empty())
299 AddSubRegionsOf(country_region, COUNTRY);
300 }
301
302 ~SubRegionData() { STLDeleteValues(&field_tries_); }
303
304 // Adds the suggestions for |user_input| into |suggestions| when user is
305 // typing in |focused_field|.
306 void BuildSuggestions(const AddressData& user_input,
307 AddressField focused_field,
308 std::vector<Suggestion>* suggestions) const {
309 // Do not suggest anything if there's no suggestion data for the focused
310 // field.
311 if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field)
312 return;
313
314 // Non-owned regions that match a field value by region key.
315 std::set<const RegionData*> regions_match_key;
316
317 // Non-owned regions that match a field value by region name.
318 std::set<const RegionData*> regions_match_name;
319
320 AddressSuggestions address_suggestions;
321 for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY;
322 ++i) {
323 AddressField address_field = static_cast<AddressField>(i);
324 AddressField parent_address_field = static_cast<AddressField>(i - 1);
325
326 const std::string& field_value = user_input.GetFieldValue(address_field);
327 const std::string& parent_field_value =
328 user_input.GetFieldValue(parent_address_field);
329
330 if (field_value.empty() &&
331 (address_field == ADMIN_AREA || parent_field_value.empty())) {
332 address_suggestions.AllRegionsMatchForField(address_field);
333 continue;
334 }
335
336 regions_match_key.clear();
337 regions_match_name.clear();
338
339 const std::string& canonical_field_value =
340 field_value.empty() ? field_value
341 : canonicalizer_.Canonicalize(field_value);
342 const FieldTries* field_tries = field_tries_.find(address_field)->second;
343 field_tries->keys.FindDataForKeyPrefix(canonical_field_value,
344 &regions_match_key);
345 field_tries->names.FindDataForKeyPrefix(canonical_field_value,
346 &regions_match_name);
347
348 bool added_suggestions = address_suggestions.AddRegions(
349 address_field, regions_match_key, regions_match_name);
350
351 // Do not suggest anything if the focused field does not have suggestions.
352 if (address_field == focused_field && !added_suggestions)
353 return;
354 }
355
356 address_suggestions.SwapSmallestSubRegionSuggestions(suggestions);
357 }
358
359 private:
360 // The tries to lookup regions for a specific field by keys and names. For
361 // example, the FieldTries for ADMIN_AREA in US will have keys for "AL", "AK",
362 // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The
363 // struct is uncopyable due to Trie objects being uncopyable.
364 struct FieldTries {
365 Trie<const RegionData*> keys;
366 Trie<const RegionData*> names;
367 };
368
369 // Adds the sub-regions of |parent_region| into tries.
370 void AddSubRegionsOf(const RegionData& parent_region,
371 AddressField parent_field) {
372 DCHECK(!parent_region.sub_regions().empty());
373
374 AddressField address_field = static_cast<AddressField>(parent_field + 1);
375 DCHECK(address_field >= ADMIN_AREA);
376 DCHECK(address_field <= DEPENDENT_LOCALITY);
377
378 FieldTries* field_tries = field_tries_[address_field];
379 for (std::vector<const RegionData*>::const_iterator it =
380 parent_region.sub_regions().begin();
381 it != parent_region.sub_regions().end();
382 ++it) {
383 const RegionData* region = *it;
384 DCHECK(region);
385
386 field_tries->keys.AddDataForKey(
387 canonicalizer_.Canonicalize(region->key()), region);
388 field_tries->names.AddDataForKey(
389 canonicalizer_.Canonicalize(region->name()), region);
390
391 if (smallest_region_size_ < address_field)
392 smallest_region_size_ = address_field;
393
394 if (!region->sub_regions().empty())
395 AddSubRegionsOf(*region, address_field);
396 }
397 }
398
399 // Owned tries to lookup regions for ADMIN_AREA, LOCALITY, and
400 // DEPENDENT_LOCALITY.
401 std::map<AddressField, FieldTries*> field_tries_;
402
403 // The smallest size of a sub-region that has data. For example, this is
404 // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN.
405 AddressField smallest_region_size_;
406
407 // A shared instance of string canonicalizer for case and diacritic comparison
408 // of region keys and names.
409 const StringCanonicalizer& canonicalizer_;
410
411 DISALLOW_COPY_AND_ASSIGN(SubRegionData);
412 };
413
414 InputSuggester::InputSuggester(PreloadSupplier* supplier)
415 : region_data_builder_(supplier),
416 input_helper_(supplier),
417 validator_(supplier),
418 validated_(BuildCallback(this, &InputSuggester::Validated)) {}
419
420 InputSuggester::~InputSuggester() {
421 STLDeleteValues(&sub_regions_);
422 }
423
424 void InputSuggester::GetSuggestions(const AddressData& user_input,
425 AddressField focused_field,
426 size_t suggestions_limit,
427 std::vector<AddressData>* suggestions) {
428 DCHECK(suggestions);
429 DCHECK(focused_field == POSTAL_CODE ||
430 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
431
432 AddressData address_copy = user_input;
433
434 // Do not suggest anything if the user input is empty.
435 if (address_copy.IsFieldEmpty(focused_field))
436 return;
437
438 if (focused_field == POSTAL_CODE) {
439 // Do not suggest anything if the user is typing an invalid postal code.
440 FieldProblemMap problems;
441 FieldProblemMap filter;
442 filter.insert(std::make_pair(POSTAL_CODE, INVALID_FORMAT));
443 validator_.Validate(address_copy,
444 true, // Allow postal office boxes.
445 false, // Do not require recipient name.
446 &filter,
447 &problems,
448 *validated_);
449 if (!problems.empty())
450 return;
451
452 // Fill in the sub-regions based on the postal code.
453 input_helper_.FillAddress(&address_copy);
454 }
455
456 // Lazily initialize the mapping from COUNTRY level regions to all of their
457 // sub-regions with metadata for generating suggestions.
458 std::string unused_best_language;
459 const RegionData& region_data =
460 region_data_builder_.Build(address_copy.region_code,
461 address_copy.language_code,
462 &unused_best_language);
463 std::map<const RegionData*, const SubRegionData*>::iterator
464 sub_region_data_it = sub_regions_.find(&region_data);
465 if (sub_region_data_it == sub_regions_.end()) {
466 if (!canonicalizer_) {
Evan Stade 2014/06/27 01:26:58 nit: no curlies
please use gerrit instead 2014/06/27 08:43:38 Done.
467 canonicalizer_.reset(new StringCanonicalizer);
468 }
469 sub_region_data_it =
470 sub_regions_.insert(std::make_pair(&region_data,
471 new SubRegionData(region_data,
472 *canonicalizer_)))
473 .first;
474 }
475 DCHECK(sub_region_data_it->second);
476
477 // Build the list of regions that match |address_copy| when the user is typing
478 // in the |focused_field|.
479 std::vector<Suggestion> suggested_regions;
480 sub_region_data_it->second->BuildSuggestions(
481 address_copy, focused_field, &suggested_regions);
482
483 FieldProblemMap problems;
484 FieldProblemMap filter;
485 filter.insert(std::make_pair(POSTAL_CODE, MISMATCHING_VALUE));
486
487 // Generate suggestions based on the regions.
488 for (std::vector<Suggestion>::const_iterator suggested_region_it =
489 suggested_regions.begin();
490 suggested_region_it != suggested_regions.end();
491 ++suggested_region_it) {
492 AddressData address;
493 address.region_code = address_copy.region_code;
494 address.postal_code = address_copy.postal_code;
495
496 // Traverse the tree of regions from the smallest |region_to_suggest| to the
497 // country-wide "root" of the tree. Use the region names or keys found at
498 // each of the levels of the tree to build the |address| to suggest.
499 AddressField address_field = suggested_region_it->matching_address_field;
500 for (const RegionData* region = suggested_region_it->region_to_suggest;
501 region->has_parent();
502 region = &region->parent()) {
503 address.SetFieldValue(address_field,
504 suggested_region_it->region_key_matches
505 ? region->key()
506 : region->name());
507 address_field = static_cast<AddressField>(address_field - 1);
508 }
509
510 // Do not suggest an address with a mismatching postal code.
511 problems.clear();
512 validator_.Validate(address_copy,
Evan Stade 2014/06/27 01:26:58 these two short circuits (continue and return) ---
please use gerrit instead 2014/06/27 08:43:38 THank you for bring this to my attention! I notice
513 true, // Allow postal office boxes.
514 false, // Do not require recipient name.
515 &filter,
516 &problems,
517 *validated_);
518 if (!problems.empty())
519 continue;
520
521 // Do not add more suggestions than |suggestions_limit|.
522 if (suggestions->size() >= suggestions_limit) {
523 suggestions->clear();
524 return;
525 }
526
527 suggestions->push_back(address);
528 }
529 }
530
531 void InputSuggester::Validated(bool success,
532 const AddressData&,
533 const FieldProblemMap&) {
534 DCHECK(success);
535 }
536
537 } // namespace autofill
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698