Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(91)

Side by Side Diff: third_party/libaddressinput/chromium/input_suggester.cc

Issue 298863012: Use upstream libaddressinput in Chrome. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fixup includes and comments for tries and ICU. Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/libaddressinput/chromium/input_suggester.h"
6
7 #include <set>
8 #include <utility>
9
10 #include "base/logging.h"
11 #include "third_party/libaddressinput/chromium/trie.h"
12 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da ta.h"
13 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/callback.h "
14 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_su pplier.h"
15 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a.h"
16
17 namespace autofill {
18
19 using ::i18n::addressinput::AddressData;
20 using ::i18n::addressinput::AddressField;
21 using ::i18n::addressinput::BuildCallback;
22 using ::i18n::addressinput::FieldProblemMap;
23 using ::i18n::addressinput::PreloadSupplier;
24 using ::i18n::addressinput::RegionData;
25 using ::i18n::addressinput::RegionDataBuilder;
26
27 using ::i18n::addressinput::ADMIN_AREA;
28 using ::i18n::addressinput::COUNTRY;
29 using ::i18n::addressinput::DEPENDENT_LOCALITY;
30 using ::i18n::addressinput::LOCALITY;
31 using ::i18n::addressinput::POSTAL_CODE;
32
33 using ::i18n::addressinput::INVALID_FORMAT;
34 using ::i18n::addressinput::MISMATCHING_VALUE;
35
36 namespace {
37
38 // Initial size for the buffer used by the canonicalizer.
39 static const int32_t kInitialBufferSize = 32;
40
41 // A region and its metadata useful for constructing a suggestion.
42 struct Suggestion {
43 public:
44 // Builds a suggestion of |region_to_suggest|. Does not take ownership of
45 // |region_to_suggest|, which should not be NULL.
46 Suggestion(const RegionData* region_to_suggest,
47 AddressField matching_address_field,
48 bool region_key_matches)
49 : region_to_suggest(region_to_suggest),
50 matching_address_field(matching_address_field),
51 region_key_matches(region_key_matches) {
52 DCHECK(region_to_suggest);
53 }
54
55 ~Suggestion() {}
56
57 // The region that should be suggested. For example, if the region is ("CA",
58 // "California"), then either "CA" or "California" should be suggested.
59 const RegionData* region_to_suggest;
60
61 // The field in the address for which the suggestion should be made. For
62 // example, ADMIN_AREA in US means the suggestion should be made for the field
63 // labeled "State".
64 AddressField matching_address_field;
65
66 // True if the key of the region matches user input (the name may or may not
67 // match). "CA" should be suggested for a ("CA", "California") region.
68 //
69 // False if only the name of the region matches user input (the key does not
70 // match). "California" should be suggested for a ("CA", "California") region.
71 bool region_key_matches;
72 };
73
74 // Suggestions for an address. Contains lists of suggestions for administrative
75 // area, locality, and dependent locality fields of an address.
76 class AddressSuggestions {
77 public:
78 AddressSuggestions() {}
79 ~AddressSuggestions() {}
80
81 // Marks all regions at |address_field| level as matching user input.
82 void AllRegionsMatchForField(AddressField address_field) {
83 all_regions_match_input_.insert(address_field);
84 }
85
86 // Marks given regions at |address_field| level as matching user input. The
87 // |regions_match_key| parameter contains the regions that match user input by
88 // their keys. The |regions_match_name| parameter contains the regions that
89 // match user input by their names.
90 //
91 // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or
92 // DEPENDENT_LOCALITY.
93 bool AddRegions(AddressField address_field,
94 const std::set<const RegionData*>& regions_match_key,
95 const std::set<const RegionData*>& regions_match_name) {
96 DCHECK(address_field >= ADMIN_AREA);
97 DCHECK(address_field <= DEPENDENT_LOCALITY);
98
99 AddressField parent_address_field =
100 static_cast<AddressField>(address_field - 1);
101
102 bool all_parents_match =
103 parent_address_field == COUNTRY ||
104 all_regions_match_input_.find(parent_address_field) !=
105 all_regions_match_input_.end();
106
107 // Cannot build |address_field| level suggestions if there are no matches in
108 // |parent_address_field| level regions.
109 const RegionsMatchInput* parents = NULL;
110 if (address_field > ADMIN_AREA && !all_parents_match) {
111 parents = &regions_match_input_[parent_address_field];
112 if (parents->keys.empty() && parents->names.empty())
113 return false;
114 }
115
116 RegionsMatchInput* regions = NULL;
117 if (address_field < DEPENDENT_LOCALITY)
118 regions = &regions_match_input_[address_field];
119
120 std::vector<Suggestion>& suggestions = suggestions_[address_field];
121 bool added_suggestions = false;
122
123 // Iterate over both |regions_match_key| and |regions_match_name| and build
124 // Suggestion objects based on the given RegionData objects. Advance either
125 // one iterator at a time (if they point to different data) or both
126 // iterators at once (if they point to the same data).
127 for (std::set<const RegionData*>::const_iterator
128 key_it = regions_match_key.begin(),
129 name_it = regions_match_name.begin();
130 key_it != regions_match_key.end() ||
131 name_it != regions_match_name.end();) {
132 const RegionData* key_region =
133 key_it != regions_match_key.end() ? *key_it : NULL;
134 const RegionData* name_region =
135 name_it != regions_match_name.end() ? *name_it : NULL;
136
137 // Regions that do not have a parent that also matches input will not
138 // become a suggestion.
139 bool key_region_has_parent =
140 all_parents_match ||
141 (parents && !parents->keys.empty() && key_region &&
142 parents->keys.find(&key_region->parent()) != parents->keys.end());
143 bool name_region_has_parent =
144 all_parents_match ||
145 (parents && !parents->names.empty() && name_region &&
146 parents->names.find(&name_region->parent()) != parents->names.end());
147
148 if (name_region && (!key_region || name_region < key_region)) {
149 if (name_region_has_parent) {
150 suggestions.push_back(Suggestion(name_region, address_field, false));
151 added_suggestions = true;
152 if (regions)
153 regions->names.insert(name_region);
154 }
155
156 ++name_it;
157 } else if (key_region && (!name_region || key_region < name_region)) {
158 if (key_region_has_parent) {
159 suggestions.push_back(Suggestion(key_region, address_field, true));
160 added_suggestions = true;
161 if (regions)
162 regions->keys.insert(key_region);
163 }
164
165 ++key_it;
166 } else {
167 if (key_region_has_parent) {
168 suggestions.push_back(Suggestion(key_region, address_field, true));
169 added_suggestions = true;
170 if (regions) {
171 regions->keys.insert(key_region);
172 regions->names.insert(name_region);
173 }
174 }
175
176 ++key_it;
177 ++name_it;
178 }
179 }
180
181 return added_suggestions;
182 }
183
184 // Swaps the suggestions for the smallest sub-region into |suggestions|.
185 // |this| is not usable after this call due to using the swap() operation.
186 //
187 // The |suggestions| parameter should not be NULL.
188 void SwapSmallestSubRegionSuggestions(std::vector<Suggestion>* suggestions) {
189 DCHECK(suggestions);
190 for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) {
191 std::vector<Suggestion>& result =
192 suggestions_[static_cast<AddressField>(i)];
193 if (!result.empty()) {
194 suggestions->swap(result);
195 return;
196 }
197 }
198 }
199
200 private:
201 // The sets of non-owned regions used for looking up regions that match user
202 // input by keys and names.
203 struct RegionsMatchInput {
204 std::set<const RegionData*> keys;
205 std::set<const RegionData*> names;
206 };
207
208 // The regions that match user input at ADMIN_AREA and LOCALITY levels.
209 std::map<AddressField, RegionsMatchInput> regions_match_input_;
210
211 // The set of fields for which all regions match user input. Used to avoid
212 // storing a long list in |regions_match_input_| and later looking it up
213 // there.
214 std::set<AddressField> all_regions_match_input_;
215
216 // Suggestions at ADMIN_AREA, LOCALITY, and DEPENDENT_LOCALITY levels.
217 std::map<AddressField, std::vector<Suggestion> > suggestions_;
218
219 DISALLOW_COPY_AND_ASSIGN(AddressSuggestions);
220 };
221
222 } // namespace
223
224 InputSuggester::StringCanonicalizer::StringCanonicalizer()
225 : buffer_size_(kInitialBufferSize), buffer_(new uint8_t[buffer_size_]) {
226 UErrorCode error_code = U_ZERO_ERROR;
227 collator_.reset(
228 icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
229 DCHECK(U_SUCCESS(error_code));
230 collator_->setStrength(icu::Collator::PRIMARY);
231 }
232
233 InputSuggester::StringCanonicalizer::~StringCanonicalizer() {}
234
235 const uint8_t* InputSuggester::StringCanonicalizer::Canonicalize(
236 const std::string& original,
237 int32_t* result_size) const {
238 DCHECK(!original.empty());
239 DCHECK(result_size);
240
241 icu::UnicodeString icu_str(original.c_str(), original.length());
242 int32_t sort_key_size =
243 collator_->getSortKey(icu_str, buffer_.get(), buffer_size_);
244 DCHECK_LT(0, sort_key_size);
245
246 if (sort_key_size > buffer_size_) {
247 buffer_size_ = sort_key_size * 2;
248 buffer_.reset(new uint8_t[buffer_size_]);
249 sort_key_size = collator_->getSortKey(icu_str, buffer_.get(), buffer_size_);
250 DCHECK_LT(0, sort_key_size);
251 DCHECK_GT(buffer_size_, sort_key_size);
252 }
253
254 *result_size = sort_key_size - 1;
255 return buffer_.get();
256 }
257
258 // All sub-regions of a COUNTRY level region, organized into tries for lookup by
259 // region name or key.
260 class InputSuggester::SubRegionData {
261 public:
262 SubRegionData()
263 : initialized_(false),
264 smallest_region_size_(COUNTRY),
265 canonicalizer_(NULL) {}
266
267 ~SubRegionData() {}
268
269 bool is_initialized() const { return initialized_; }
270
271 // Adds the sub-regions of |country_region| into tries. Uses
272 // |shared_canonicalizer| for case and diacritic insensitive lookup of the
273 // sub-regions. Should be called at most once.
274 void Initialize(const RegionData& country_region,
275 const StringCanonicalizer& shared_canonicalizer) {
276 DCHECK(!initialized_);
277 DCHECK(!country_region.has_parent());
278
279 initialized_ = true;
280 canonicalizer_ = &shared_canonicalizer;
281
282 if (!country_region.sub_regions().empty())
283 AddSubRegionsOf(country_region, COUNTRY);
284 }
285
286 // Adds the suggestions for |user_input| into |suggestions| when user is
287 // typing in |focused_field|.
288 void BuildSuggestions(const AddressData& user_input,
289 AddressField focused_field,
290 std::vector<Suggestion>* suggestions) {
291 DCHECK(initialized_);
292
293 // Do not suggest anything if there's no suggestion data for the focused
294 // field.
295 if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field)
296 return;
297
298 // Non-owned regions that match a field value by region key.
299 std::set<const RegionData*> regions_match_key;
300
301 // Non-owned regions that match a field value by region name.
302 std::set<const RegionData*> regions_match_name;
303
304 AddressSuggestions address_suggestions;
305 for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY;
306 ++i) {
307 AddressField address_field = static_cast<AddressField>(i);
308 AddressField parent_address_field = static_cast<AddressField>(i - 1);
309
310 const std::string& field_value = user_input.GetFieldValue(address_field);
311 const std::string& parent_field_value =
312 user_input.GetFieldValue(parent_address_field);
313
314 if (field_value.empty() &&
315 (address_field == ADMIN_AREA || parent_field_value.empty())) {
316 address_suggestions.AllRegionsMatchForField(address_field);
317 continue;
318 }
319
320 if (field_value.empty()) {
321 DCHECK_NE(address_field, focused_field);
322 continue;
323 }
324
325 regions_match_key.clear();
326 regions_match_name.clear();
327
328 const FieldTries& field_tries = field_tries_[address_field];
329
330 int32_t size = -1;
331 const uint8_t* value = canonicalizer_->Canonicalize(field_value, &size);
332 field_tries.keys.FindDataForKeyPrefix(value, size, &regions_match_key);
333 field_tries.names.FindDataForKeyPrefix(value, size, &regions_match_name);
334
335 bool added_suggestions = address_suggestions.AddRegions(
336 address_field, regions_match_key, regions_match_name);
337
338 // Do not suggest anything if the focused field does not have suggestions.
339 if (address_field == focused_field && !added_suggestions)
340 return;
341 }
342
343 address_suggestions.SwapSmallestSubRegionSuggestions(suggestions);
344 }
345
346 private:
347 // The tries to lookup regions for a specific field by keys and names. For
348 // example, the FieldTries for ADMIN_AREA in US will have keys for "AL", "AK",
349 // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The
350 // struct is uncopyable due to Trie objects being uncopyable.
351 struct FieldTries {
352 Trie<const RegionData*> keys;
353 Trie<const RegionData*> names;
354 };
355
356 // Adds the sub-regions of |parent_region| into tries.
357 void AddSubRegionsOf(const RegionData& parent_region,
358 AddressField parent_field) {
359 DCHECK(!parent_region.sub_regions().empty());
360
361 AddressField address_field = static_cast<AddressField>(parent_field + 1);
362 DCHECK(address_field >= ADMIN_AREA);
363 DCHECK(address_field <= DEPENDENT_LOCALITY);
364
365 FieldTries& field_tries = field_tries_[address_field];
366 for (std::vector<const RegionData*>::const_iterator it =
367 parent_region.sub_regions().begin();
368 it != parent_region.sub_regions().end();
369 ++it) {
370 const RegionData* region = *it;
371 DCHECK(region);
372 DCHECK(!region->key().empty());
373 DCHECK(!region->name().empty());
374
375 int32_t key_size = -1;
376 const uint8_t* key_value =
377 canonicalizer_->Canonicalize(region->key(), &key_size);
378 field_tries.keys.AddDataForKey(key_value, key_size, region);
379
380 int32_t name_size = -1;
381 const uint8_t* name_value =
382 canonicalizer_->Canonicalize(region->name(), &name_size);
383 field_tries.names.AddDataForKey(name_value, name_size, region);
384
385 if (smallest_region_size_ < address_field)
386 smallest_region_size_ = address_field;
387
388 if (!region->sub_regions().empty())
389 AddSubRegionsOf(*region, address_field);
390 }
391 }
392
393 // True after Initialize() has been called.
394 bool initialized_;
395
396 // The tries to lookup regions for ADMIN_AREA, LOCALITY, and
397 // DEPENDENT_LOCALITY.
398 std::map<AddressField, FieldTries> field_tries_;
399
400 // The smallest size of a sub-region that has data. For example, this is
401 // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN.
402 AddressField smallest_region_size_;
403
404 // A shared instance of string canonicalizer for case and diacritic comparison
405 // of region keys and names.
406 const StringCanonicalizer* canonicalizer_;
407 };
408
409 InputSuggester::InputSuggester(PreloadSupplier* supplier)
410 : region_data_builder_(supplier),
411 input_helper_(supplier),
412 validator_(supplier),
413 validated_(BuildCallback(this, &InputSuggester::Validated)) {}
414
415 InputSuggester::~InputSuggester() {}
416
417 void InputSuggester::GetSuggestions(const AddressData& user_input,
418 AddressField focused_field,
419 size_t suggestions_limit,
420 std::vector<AddressData>* suggestions) {
421 DCHECK(suggestions);
422 DCHECK(focused_field == POSTAL_CODE ||
423 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
424
425 AddressData address_copy = user_input;
426
427 // Do not suggest anything if the user input is empty.
428 if (address_copy.IsFieldEmpty(focused_field))
429 return;
430
431 if (focused_field == POSTAL_CODE) {
432 // Do not suggest anything if the user is typing an invalid postal code.
433 FieldProblemMap problems;
434 FieldProblemMap filter;
435 filter.insert(std::make_pair(POSTAL_CODE, INVALID_FORMAT));
436 validator_.Validate(address_copy,
437 true, // Allow postal office boxes.
438 false, // Do not require recipient name.
439 &filter,
440 &problems,
441 *validated_);
442 if (!problems.empty())
443 return;
444
445 // Fill in the sub-regions based on the postal code.
446 input_helper_.FillAddress(&address_copy);
447 }
448
449 // Lazily initialize the mapping from COUNTRY level regions to all of their
450 // sub-regions with metadata for generating suggestions.
451 std::string unused_best_language;
452 const RegionData& region_data =
453 region_data_builder_.Build(address_copy.region_code,
454 address_copy.language_code,
455 &unused_best_language);
456 SubRegionData& sub_region_data = sub_regions_[&region_data];
Evan Stade 2014/06/27 22:44:38 I don't think we use non-const references
please use gerrit instead 2014/06/28 22:05:21 Changed to pointer.
457 if (!sub_region_data.is_initialized())
458 sub_region_data.Initialize(region_data, canonicalizer_);
459
460 // Build the list of regions that match |address_copy| when the user is typing
461 // in the |focused_field|.
462 std::vector<Suggestion> suggested_regions;
463 sub_region_data.BuildSuggestions(
464 address_copy, focused_field, &suggested_regions);
465
466 FieldProblemMap problems;
467 FieldProblemMap filter;
468 filter.insert(std::make_pair(POSTAL_CODE, MISMATCHING_VALUE));
469
470 // Generate suggestions based on the regions.
471 for (std::vector<Suggestion>::const_iterator suggested_region_it =
472 suggested_regions.begin();
473 suggested_region_it != suggested_regions.end();
474 ++suggested_region_it) {
475 AddressData address;
476 address.region_code = address_copy.region_code;
477 address.postal_code = address_copy.postal_code;
478
479 // Traverse the tree of regions from the smallest |region_to_suggest| to the
480 // country-wide "root" of the tree. Use the region names or keys found at
481 // each of the levels of the tree to build the |address| to suggest.
482 AddressField address_field = suggested_region_it->matching_address_field;
483 for (const RegionData* region = suggested_region_it->region_to_suggest;
484 region->has_parent();
485 region = &region->parent()) {
486 address.SetFieldValue(address_field,
487 suggested_region_it->region_key_matches
488 ? region->key()
489 : region->name());
490 address_field = static_cast<AddressField>(address_field - 1);
491 }
492
493 // Do not suggest an address with a mismatching postal code.
494 problems.clear();
495 validator_.Validate(address,
496 true, // Allow postal office boxes.
497 false, // Do not require recipient name.
498 &filter,
499 &problems,
500 *validated_);
501 if (!problems.empty())
502 continue;
503
504 // Do not add more suggestions than |suggestions_limit|.
505 if (suggestions->size() >= suggestions_limit) {
506 suggestions->clear();
507 return;
508 }
509
510 suggestions->push_back(address);
511 }
512 }
513
514 void InputSuggester::Validated(bool success,
515 const AddressData&,
516 const FieldProblemMap&) {
517 DCHECK(success);
518 }
519
520 } // namespace autofill
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698