Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(503)

Side by Side Diff: third_party/libaddressinput/chromium/input_suggester.cc

Issue 298863012: Use upstream libaddressinput in Chrome. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Make StringCanonicalizer not a scoped_ptr. Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/libaddressinput/chromium/input_suggester.h"
6
7 #include <map>
8 #include <set>
9 #include <utility>
10
11 #include "base/logging.h"
12 #include "base/stl_util.h"
13 #include "third_party/libaddressinput/chromium/trie.h"
14 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da ta.h"
15 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/callback.h "
16 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_su pplier.h"
17 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a.h"
18
19 namespace autofill {
20
21 using ::i18n::addressinput::AddressData;
22 using ::i18n::addressinput::AddressField;
23 using ::i18n::addressinput::BuildCallback;
24 using ::i18n::addressinput::FieldProblemMap;
25 using ::i18n::addressinput::PreloadSupplier;
26 using ::i18n::addressinput::RegionData;
27 using ::i18n::addressinput::RegionDataBuilder;
28
29 using ::i18n::addressinput::ADMIN_AREA;
30 using ::i18n::addressinput::COUNTRY;
31 using ::i18n::addressinput::DEPENDENT_LOCALITY;
32 using ::i18n::addressinput::LOCALITY;
33 using ::i18n::addressinput::POSTAL_CODE;
34
35 using ::i18n::addressinput::INVALID_FORMAT;
36 using ::i18n::addressinput::MISMATCHING_VALUE;
37
38 namespace {
39
40 // A region and its metadata useful for constructing a suggestion.
41 struct Suggestion {
42 public:
43 // Builds a suggestion of |region_to_suggest|. Does not take ownership of
44 // |region_to_suggest|, which should not be NULL.
45 Suggestion(const RegionData* region_to_suggest,
46 AddressField matching_address_field,
47 bool region_key_matches)
48 : region_to_suggest(region_to_suggest),
49 matching_address_field(matching_address_field),
50 region_key_matches(region_key_matches) {
51 DCHECK(region_to_suggest);
52 }
53
54 ~Suggestion() {}
55
56 // The region that should be suggested. For example, if the region is ("CA",
57 // "California"), then either "CA" or "California" should be suggested.
58 const RegionData* region_to_suggest;
59
60 // The field in the address for which the suggestion should be made. For
61 // example, ADMIN_AREA in US means the suggestion should be made for the field
62 // labeled "State".
63 AddressField matching_address_field;
64
65 // True if the key of the region matches user input (the name may or may not
66 // match). "CA" should be suggested for a ("CA", "California") region.
67 //
68 // False if only the name of the region matches user input (key does not
69 // match). "California" should be suggested fir a ("CA", "California") region.
70 bool region_key_matches;
71 };
72
73 // Suggestions for an address. Contains lists of suggestions for administrative
74 // area, locality, and dependent locality fields of an address.
75 class AddressSuggestions {
76 public:
77 AddressSuggestions() {
78 for (int i = ADMIN_AREA; i <= LOCALITY; ++i) {
79 regions_match_input_[static_cast<AddressField>(i)] =
80 new RegionsMatchInput;
81 }
82 }
83
84 ~AddressSuggestions() { STLDeleteValues(&regions_match_input_); }
85
86 // Marks all regions at |address_field| level as matching user input.
87 void AllRegionsMatchForField(AddressField address_field) {
88 all_regions_match_input_.insert(address_field);
89 }
90
91 // Marks given regions at |address_field| level as matching user input. The
92 // |regions_match_key| parameter contains the regions that match user input by
93 // their keys. The |regions_match_name| parameter contains the regions that
94 // match user input by their names.
95 //
96 // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or
97 // DEPENDENT_LOCALITY.
98 bool AddRegions(AddressField address_field,
99 const std::set<const RegionData*>& regions_match_key,
100 const std::set<const RegionData*>& regions_match_name) {
101 DCHECK(address_field >= ADMIN_AREA);
102 DCHECK(address_field <= DEPENDENT_LOCALITY);
103
104 AddressField parent_address_field =
105 static_cast<AddressField>(address_field - 1);
106
107 bool all_parents_match =
108 parent_address_field == COUNTRY ||
109 all_regions_match_input_.find(parent_address_field) !=
110 all_regions_match_input_.end();
111
112 // Cannot build |address_field| level suggestions if there are no matches in
113 // |parent_address_field| level regions.
114 const RegionsMatchInput* parents = NULL;
115 if (address_field > ADMIN_AREA && !all_parents_match) {
116 parents = regions_match_input_[parent_address_field];
117 if (parents->keys.empty() && parents->names.empty())
118 return false;
119 }
120
121 RegionsMatchInput* regions = NULL;
122 if (address_field < DEPENDENT_LOCALITY)
123 regions = regions_match_input_[address_field];
124
125 std::vector<Suggestion>& suggestions = suggestions_[address_field];
126 bool added_suggestions = false;
127
128 // Iterate over both |regions_match_key| and |regions_match_name| and build
129 // Suggestion objects based on the given RegionData objects. Advance either
130 // one iterator at a time (if they point to different data) or both
131 // iterators at once (if they point to the same data).
132 for (std::set<const RegionData*>::const_iterator
133 key_it = regions_match_key.begin(),
134 name_it = regions_match_name.begin();
135 key_it != regions_match_key.end() ||
136 name_it != regions_match_name.end();) {
137 const RegionData* key_region =
138 key_it != regions_match_key.end() ? *key_it : NULL;
139 const RegionData* name_region =
140 name_it != regions_match_name.end() ? *name_it : NULL;
141
142 // Regions that do not have a parent that also matches input will not
143 // become a suggestion.
144 bool key_region_has_parent =
145 all_parents_match ||
146 (parents && !parents->keys.empty() && key_region &&
147 parents->keys.find(&key_region->parent()) != parents->keys.end());
148 bool name_region_has_parent =
149 all_parents_match ||
150 (parents && !parents->names.empty() && name_region &&
151 parents->names.find(&name_region->parent()) != parents->names.end());
152
153 if (name_region && (!key_region || name_region < key_region)) {
154 if (name_region_has_parent) {
155 suggestions.push_back(Suggestion(name_region, address_field, false));
156 added_suggestions = true;
157 if (regions)
158 regions->names.insert(name_region);
159 }
160
161 ++name_it;
162 } else if (key_region && (!name_region || key_region < name_region)) {
163 if (key_region_has_parent) {
164 suggestions.push_back(Suggestion(key_region, address_field, true));
165 added_suggestions = true;
166 if (regions)
167 regions->keys.insert(key_region);
168 }
169
170 ++key_it;
171 } else {
172 if (key_region_has_parent) {
173 suggestions.push_back(Suggestion(key_region, address_field, true));
174 added_suggestions = true;
175 if (regions) {
176 regions->keys.insert(key_region);
177 regions->names.insert(name_region);
178 }
179 }
180
181 ++key_it;
182 ++name_it;
183 }
184 }
185
186 return added_suggestions;
187 }
188
189 // Swaps the suggestions for the smallest sub-region into |suggestions|.
190 // |this| is not usable after this call due to using the swap() operation.
191 //
192 // The |suggestions| parameter should not be NULL.
193 void SwapSmallestSubRegionSuggestions(std::vector<Suggestion>* suggestions) {
194 DCHECK(suggestions);
195 for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) {
196 std::vector<Suggestion>& result =
197 suggestions_[static_cast<AddressField>(i)];
198 if (!result.empty()) {
199 suggestions->swap(result);
200 return;
201 }
202 }
203 }
204
205 private:
206 // The sets of non-owned regions used for looking up regions that match user
207 // input by keys and names.
208 struct RegionsMatchInput {
209 std::set<const RegionData*> keys;
210 std::set<const RegionData*> names;
211 };
212
213 // The owned sets of regions that match user input at ADMIN_AREA and LOCALITY
214 // levels.
215 std::map<AddressField, RegionsMatchInput*> regions_match_input_;
Evan Stade 2014/06/27 16:42:33 why not std::map<AddressField, RegionsMatchInput>?
please use gerrit instead 2014/06/27 19:34:38 Done.
216
217 // The set of fields for which all regions match user input. Used to avoid
218 // storing a long list in |regions_match_input_| and later looking it up
219 // there.
220 std::set<AddressField> all_regions_match_input_;
221
222 // Suggestions at ADMIN_AREA, LOCALITY, and DEPENDENT_LOCALITY levels.
223 std::map<AddressField, std::vector<Suggestion> > suggestions_;
224
225 DISALLOW_COPY_AND_ASSIGN(AddressSuggestions);
226 };
227
228 } // namespace
229
230 InputSuggester::StringCanonicalizer::StringCanonicalizer()
231 : buffer_size_(kInitialBufferSize), buffer_(new uint8_t[buffer_size_]) {
232 UErrorCode error_code = U_ZERO_ERROR;
233 collator_.reset(
234 icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
235 DCHECK(U_SUCCESS(error_code));
236 collator_->setStrength(icu::Collator::PRIMARY);
237 }
238
239 InputSuggester::StringCanonicalizer::~StringCanonicalizer() {}
240
241 const uint8_t* InputSuggester::StringCanonicalizer::Canonicalize(
242 const std::string& original,
243 int32_t* result_size) const {
244 DCHECK(!original.empty());
245 DCHECK(result_size);
246
247 icu::UnicodeString icu_str(original.c_str(), original.length());
248 int32_t sort_key_size =
249 collator_->getSortKey(icu_str, buffer_.get(), buffer_size_);
250 DCHECK_LT(0, sort_key_size);
251
252 if (sort_key_size > buffer_size_) {
253 buffer_size_ = sort_key_size * 2;
254 buffer_.reset(new uint8_t[buffer_size_]);
255 sort_key_size = collator_->getSortKey(icu_str, buffer_.get(), buffer_size_);
256 DCHECK_LT(0, sort_key_size);
257 DCHECK_GT(buffer_size_, sort_key_size);
258 }
259
260 *result_size = sort_key_size - 1;
261 return buffer_.get();
262 }
263
264 // All sub-regions of a COUNTRY level region, organized into tries for lookup by
265 // region name or key.
266 class InputSuggester::SubRegionData {
267 public:
268 // Adds the sub-regions of |country_region| into tries. Uses
269 // |shared_canonicalizer| for case and diacritic insensitive lookup of the
270 // sub-regions.
271 SubRegionData(const RegionData& country_region,
272 const StringCanonicalizer& shared_canonicalizer)
273 : canonicalizer_(shared_canonicalizer), smallest_region_size_(COUNTRY) {
274 DCHECK(!country_region.has_parent());
275
276 for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i)
277 field_tries_[static_cast<AddressField>(i)] = new FieldTries;
278
279 if (!country_region.sub_regions().empty())
280 AddSubRegionsOf(country_region, COUNTRY);
281 }
282
283 ~SubRegionData() { STLDeleteValues(&field_tries_); }
284
285 // Adds the suggestions for |user_input| into |suggestions| when user is
286 // typing in |focused_field|.
287 void BuildSuggestions(const AddressData& user_input,
288 AddressField focused_field,
289 std::vector<Suggestion>* suggestions) const {
290 // Do not suggest anything if there's no suggestion data for the focused
291 // field.
292 if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field)
293 return;
294
295 // Non-owned regions that match a field value by region key.
296 std::set<const RegionData*> regions_match_key;
297
298 // Non-owned regions that match a field value by region name.
299 std::set<const RegionData*> regions_match_name;
300
301 AddressSuggestions address_suggestions;
302 for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY;
303 ++i) {
304 AddressField address_field = static_cast<AddressField>(i);
305 AddressField parent_address_field = static_cast<AddressField>(i - 1);
306
307 const std::string& field_value = user_input.GetFieldValue(address_field);
308 const std::string& parent_field_value =
309 user_input.GetFieldValue(parent_address_field);
310
311 if (field_value.empty() &&
312 (address_field == ADMIN_AREA || parent_field_value.empty())) {
313 address_suggestions.AllRegionsMatchForField(address_field);
314 continue;
315 }
316
317 if (field_value.empty()) {
318 DCHECK_NE(address_field, focused_field);
319 continue;
320 }
321
322 regions_match_key.clear();
323 regions_match_name.clear();
324
325 const FieldTries* field_tries = field_tries_.find(address_field)->second;
Evan Stade 2014/06/27 16:42:33 why not field_tries_[address_field]?
please use gerrit instead 2014/06/27 19:34:38 Done.
326
327 int32_t size = -1;
328 const uint8_t* value = canonicalizer_.Canonicalize(field_value, &size);
329 field_tries->keys.FindDataForKeyPrefix(value, size, &regions_match_key);
330 field_tries->names.FindDataForKeyPrefix(value, size, &regions_match_name);
331
332 bool added_suggestions = address_suggestions.AddRegions(
333 address_field, regions_match_key, regions_match_name);
334
335 // Do not suggest anything if the focused field does not have suggestions.
336 if (address_field == focused_field && !added_suggestions)
337 return;
338 }
339
340 address_suggestions.SwapSmallestSubRegionSuggestions(suggestions);
341 }
342
343 private:
344 // The tries to lookup regions for a specific field by keys and names. For
345 // example, the FieldTries for ADMIN_AREA in US will have keys for "AL",
346 // "AK",
Evan Stade 2014/06/27 16:42:33 weird line breaking here
please use gerrit instead 2014/06/27 19:34:38 Fixed.
347 // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The
348 // struct is uncopyable due to Trie objects being uncopyable.
349 struct FieldTries {
350 Trie<const RegionData*> keys;
351 Trie<const RegionData*> names;
352 };
353
354 // Adds the sub-regions of |parent_region| into tries.
355 void AddSubRegionsOf(const RegionData& parent_region,
356 AddressField parent_field) {
357 DCHECK(!parent_region.sub_regions().empty());
358
359 AddressField address_field = static_cast<AddressField>(parent_field + 1);
360 DCHECK(address_field >= ADMIN_AREA);
361 DCHECK(address_field <= DEPENDENT_LOCALITY);
362
363 FieldTries* field_tries = field_tries_[address_field];
364 for (std::vector<const RegionData*>::const_iterator it =
365 parent_region.sub_regions().begin();
366 it != parent_region.sub_regions().end();
367 ++it) {
368 const RegionData* region = *it;
369 DCHECK(region);
370 DCHECK(!region->key().empty());
371 DCHECK(!region->name().empty());
372
373 int32_t key_size = -1;
374 const uint8_t* key_value =
375 canonicalizer_.Canonicalize(region->key(), &key_size);
376 field_tries->keys.AddDataForKey(key_value, key_size, region);
377
378 int32_t name_size = -1;
379 const uint8_t* name_value =
380 canonicalizer_.Canonicalize(region->name(), &name_size);
381 field_tries->names.AddDataForKey(name_value, name_size, region);
382
383 if (smallest_region_size_ < address_field)
384 smallest_region_size_ = address_field;
385
386 if (!region->sub_regions().empty())
387 AddSubRegionsOf(*region, address_field);
388 }
389 }
390
391 // Owned tries to lookup regions for ADMIN_AREA, LOCALITY, and
392 // DEPENDENT_LOCALITY.
393 std::map<AddressField, FieldTries*> field_tries_;
Evan Stade 2014/06/27 16:42:33 why not std::map<AddressField, FieldTries>?
please use gerrit instead 2014/06/27 19:34:38 Done.
394
395 // The smallest size of a sub-region that has data. For example, this is
396 // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN.
397 AddressField smallest_region_size_;
398
399 // A shared instance of string canonicalizer for case and diacritic comparison
400 // of region keys and names.
401 const StringCanonicalizer& canonicalizer_;
402
403 DISALLOW_COPY_AND_ASSIGN(SubRegionData);
404 };
405
406 InputSuggester::InputSuggester(PreloadSupplier* supplier)
407 : region_data_builder_(supplier),
408 input_helper_(supplier),
409 validator_(supplier),
410 validated_(BuildCallback(this, &InputSuggester::Validated)) {}
411
412 InputSuggester::~InputSuggester() {
413 STLDeleteValues(&sub_regions_);
414 }
415
416 void InputSuggester::GetSuggestions(const AddressData& user_input,
417 AddressField focused_field,
418 size_t suggestions_limit,
419 std::vector<AddressData>* suggestions) {
420 DCHECK(suggestions);
421 DCHECK(focused_field == POSTAL_CODE ||
422 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
423
424 AddressData address_copy = user_input;
425
426 // Do not suggest anything if the user input is empty.
427 if (address_copy.IsFieldEmpty(focused_field))
428 return;
429
430 if (focused_field == POSTAL_CODE) {
431 // Do not suggest anything if the user is typing an invalid postal code.
432 FieldProblemMap problems;
433 FieldProblemMap filter;
434 filter.insert(std::make_pair(POSTAL_CODE, INVALID_FORMAT));
435 validator_.Validate(address_copy,
436 true, // Allow postal office boxes.
437 false, // Do not require recipient name.
438 &filter,
439 &problems,
440 *validated_);
441 if (!problems.empty())
442 return;
443
444 // Fill in the sub-regions based on the postal code.
445 input_helper_.FillAddress(&address_copy);
446 }
447
448 // Lazily initialize the mapping from COUNTRY level regions to all of their
449 // sub-regions with metadata for generating suggestions.
450 std::string unused_best_language;
451 const RegionData& region_data =
452 region_data_builder_.Build(address_copy.region_code,
453 address_copy.language_code,
454 &unused_best_language);
455 std::map<const RegionData*, const SubRegionData*>::iterator
456 sub_region_data_it = sub_regions_.find(&region_data);
Evan Stade 2014/06/27 16:42:33 if you move the SubRegionData constructor params t
please use gerrit instead 2014/06/27 19:34:38 Done: if (!sub_region_data.is_initialized()) su
457 if (sub_region_data_it == sub_regions_.end()) {
458 sub_region_data_it =
459 sub_regions_.insert(std::make_pair(
460 &region_data,
461 new SubRegionData(region_data, canonicalizer_)))
462 .first;
463 }
464 DCHECK(sub_region_data_it->second);
465
466 // Build the list of regions that match |address_copy| when the user is typing
467 // in the |focused_field|.
468 std::vector<Suggestion> suggested_regions;
469 sub_region_data_it->second->BuildSuggestions(
470 address_copy, focused_field, &suggested_regions);
471
472 FieldProblemMap problems;
473 FieldProblemMap filter;
474 filter.insert(std::make_pair(POSTAL_CODE, MISMATCHING_VALUE));
475
476 // Generate suggestions based on the regions.
477 for (std::vector<Suggestion>::const_iterator suggested_region_it =
478 suggested_regions.begin();
479 suggested_region_it != suggested_regions.end();
480 ++suggested_region_it) {
481 AddressData address;
482 address.region_code = address_copy.region_code;
483 address.postal_code = address_copy.postal_code;
484
485 // Traverse the tree of regions from the smallest |region_to_suggest| to the
486 // country-wide "root" of the tree. Use the region names or keys found at
487 // each of the levels of the tree to build the |address| to suggest.
488 AddressField address_field = suggested_region_it->matching_address_field;
489 for (const RegionData* region = suggested_region_it->region_to_suggest;
490 region->has_parent();
491 region = &region->parent()) {
492 address.SetFieldValue(address_field,
493 suggested_region_it->region_key_matches
494 ? region->key()
495 : region->name());
496 address_field = static_cast<AddressField>(address_field - 1);
497 }
498
499 // Do not suggest an address with a mismatching postal code.
500 problems.clear();
501 validator_.Validate(address,
502 true, // Allow postal office boxes.
503 false, // Do not require recipient name.
504 &filter,
505 &problems,
506 *validated_);
507 if (!problems.empty())
508 continue;
509
510 // Do not add more suggestions than |suggestions_limit|.
511 if (suggestions->size() >= suggestions_limit) {
512 suggestions->clear();
513 return;
514 }
515
516 suggestions->push_back(address);
517 }
518 }
519
520 void InputSuggester::Validated(bool success,
521 const AddressData&,
522 const FieldProblemMap&) {
523 DCHECK(success);
524 }
525
526 } // namespace autofill
OLDNEW
« no previous file with comments | « third_party/libaddressinput/chromium/input_suggester.h ('k') | third_party/libaddressinput/chromium/json.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698