Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(149)

Side by Side Diff: third_party/libaddressinput/chromium/input_suggester.cc

Issue 298863012: Use upstream libaddressinput in Chrome. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Work in progress. Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/libaddressinput/chromium/input_suggester.h"
6
7 #include <map>
8 #include <set>
9 #include <utility>
10
11 #include "base/basictypes.h"
12 #include "base/logging.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "base/memory/scoped_vector.h"
15 #include "base/stl_util.h"
16 #include "third_party/icu/source/i18n/unicode/coll.h"
17 #include "third_party/libaddressinput/chromium/trie.h"
18 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da ta.h"
19 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a.h"
20 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a_builder.h"
21
22 namespace autofill {
23
24 using ::i18n::addressinput::AddressData;
25 using ::i18n::addressinput::AddressField;
26 using ::i18n::addressinput::PreloadSupplier;
27 using ::i18n::addressinput::RegionData;
28 using ::i18n::addressinput::RegionDataBuilder;
29
30 using ::i18n::addressinput::ADMIN_AREA;
31 using ::i18n::addressinput::COUNTRY;
32 using ::i18n::addressinput::DEPENDENT_LOCALITY;
33 using ::i18n::addressinput::LOCALITY;
34 using ::i18n::addressinput::POSTAL_CODE;
35
36 namespace {
37
38 // Canonicalizes strings for fast case and diacritic insensitive comparison.
39 class StringCanonicalizer {
40 public:
41 StringCanonicalizer() {
42 UErrorCode error_code = U_ZERO_ERROR;
43 collator_.reset(
44 icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
45 DCHECK(U_SUCCESS(error_code));
46 collator_->setStrength(icu::Collator::PRIMARY);
47 }
48
49 ~StringCanonicalizer() {}
50
51 // Returns a canonical version of the string that can be used for comparing
52 // strings regardless of diacritics and capitalization.
53 // Canonicalize("Texas") == Canonicalize("T\u00E9xas");
54 // Canonicalize("Texas") == Canonicalize("teXas");
55 // Canonicalize("Texas") != Canonicalize("California");
56 //
57 // The output is not human-readable.
58 // Canonicalize("Texas") != "Texas";
59 std::string Canonicalize(const std::string& original) const {
60 icu::UnicodeString icu_str(original.c_str(),
61 static_cast<int32_t>(original.length()));
62 int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0);
63 scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
64 DCHECK(buffer.get());
65 int32_t filled_size =
66 collator_->getSortKey(icu_str, buffer.get(), buffer_size);
67 DCHECK_EQ(buffer_size, filled_size);
68 return std::string(reinterpret_cast<const char*>(buffer.get()));
69 }
70
71 private:
72 scoped_ptr<icu::Collator> collator_;
73
74 DISALLOW_COPY_AND_ASSIGN(StringCanonicalizer);
75 };
76
77 // A region and its metadata useful for constructing a suggestion.
78 class Suggestion {
79 public:
80 // Does not take ownership of |region_to_suggest|, which cannot be NULL. At
81 // least one of |region_key_matches| and |region_name_names| must be true.
82 Suggestion(const RegionData* region_to_suggest,
83 AddressField matching_address_field,
84 bool region_key_matches,
85 bool region_name_matches)
86 : region_to_suggest_(region_to_suggest),
87 matching_address_field_(matching_address_field),
88 region_key_matches_(region_key_matches),
89 region_name_matches_(region_name_matches) {
90 DCHECK(region_to_suggest);
91 DCHECK(region_key_matches || region_name_matches);
92 }
93
94 ~Suggestion() {}
95
96 const RegionData& region_to_suggest() const { return *region_to_suggest_; }
97
98 AddressField matching_address_field() const {
99 return matching_address_field_;
100 }
101
102 bool region_key_matches() const { return region_key_matches_; }
103
104 bool region_name_matches() const { return region_name_matches_; }
105
106 private:
107 const RegionData* const region_to_suggest_;
108 const AddressField matching_address_field_;
109 bool region_key_matches_;
110 bool region_name_matches_;
111
112 DISALLOW_COPY_AND_ASSIGN(Suggestion);
113 };
114
115 // Suggestions for a country.
116 class CountrySuggestions {
117 public:
118 CountrySuggestions()
119 : all_administrative_areas_match_(false), all_localities_match_(false) {}
120
121 ~CountrySuggestions() {}
122
123 // Should be called when user input for |field| is empty. The |field|
124 // parameter should be either ADMIN_AREA, LOCALITY, DEPENDENT_LOCALITY. Don't
125 // call this method with LOCALITY if you did not call this method with
126 // ADMIN_AREA: if only a subset of ADMIN_AREA regions match, then all LOLALITY
127 // regions cannot possibly match.
128 void AllRegionsMatchForField(AddressField field) {
129 switch (field) {
130 case ADMIN_AREA:
131 all_administrative_areas_match_ = true;
132 return;
133 case LOCALITY:
134 DCHECK(all_administrative_areas_match_);
135 all_localities_match_ = true;
136 return;
137 case DEPENDENT_LOCALITY:
138 // No-op.
139 return;
140 default:
141 break;
142 }
143 NOTREACHED();
144 }
145
146 // The |address_field| parameter should be ADMIN_AREA, LOCALITY, or
147 // DEPENDENT_LOCALITY. If |address_field| is LOCALITY or DEPENDENT_LOCALITY,
148 // then the method assumes that either this method or
149 // AllRegionsMatchForField() were previously invoked with |address_field-1|.
150 //
151 // Returns true if added suggestions for the regions.
152 bool AddRegions(AddressField address_field,
153 const std::set<const RegionData*>& regions_match_key,
154 const std::set<const RegionData*>& regions_match_name) {
155 AddressField parent_address_field =
156 static_cast<AddressField>(address_field - 1);
157
158 // Cannot build locality suggestions if none of the administrative areas
159 // match.
160 if (address_field == LOCALITY && !all_administrative_areas_match_ &&
161 administrative_area_key_regions_.empty() &&
162 administrative_area_name_regions_.empty()) {
163 return false;
164 }
165
166 // Cannot build dependent locality suggestions if none of the localities
167 // match.
168 if (address_field == DEPENDENT_LOCALITY && !all_localities_match_ &&
169 locality_key_regions_.empty() && locality_name_regions_.empty()) {
170 return false;
171 }
172
173 bool all_parents_match =
174 (parent_address_field == ADMIN_AREA &&
175 all_administrative_areas_match_) ||
176 (parent_address_field == LOCALITY && all_localities_match_);
177
178 ScopedVector<Suggestion>* suggestions = NULL;
179 switch (address_field) {
180 case ADMIN_AREA:
181 suggestions = &administrative_area_;
182 break;
183 case LOCALITY:
184 suggestions = &locality_;
185 break;
186 case DEPENDENT_LOCALITY:
187 suggestions = &dependent_locality_;
188 break;
189 default:
190 NOTREACHED();
191 break;
192 }
193
194 std::set<const RegionData*>* key_parents = NULL;
195 std::set<const RegionData*>* name_parents = NULL;
196 std::set<const RegionData*>* key_regions = NULL;
197 std::set<const RegionData*>* name_regions = NULL;
198 switch (address_field) {
199 case ADMIN_AREA:
200 key_parents = NULL;
201 name_parents = NULL;
202 key_regions = &administrative_area_key_regions_;
203 name_regions = &administrative_area_name_regions_;
204 break;
205 case LOCALITY:
206 key_parents = &administrative_area_key_regions_;
207 name_parents = &administrative_area_name_regions_;
208 key_regions = &locality_key_regions_;
209 name_regions = &locality_name_regions_;
210 break;
211 case DEPENDENT_LOCALITY:
212 key_parents = &locality_key_regions_;
213 name_parents = &locality_name_regions_;
214 key_regions = NULL;
215 name_regions = NULL;
216 break;
217 default:
218 NOTREACHED();
219 break;
220 }
221
222 bool added_suggestions = false;
223 for (std::set<const RegionData*>::const_iterator
224 key_it = regions_match_key.begin(),
225 name_it = regions_match_name.begin();
226 key_it != regions_match_key.end() ||
227 name_it != regions_match_name.end();) {
228 const RegionData* key_region =
229 key_it != regions_match_key.end() ? *key_it : NULL;
230
231 const RegionData* name_region =
232 name_it != regions_match_name.end() ? *name_it : NULL;
233
234 const RegionData* key_parent = NULL;
235 const RegionData* name_parent = NULL;
236 if (key_parents && !all_parents_match) {
237 if (key_region) {
238 DCHECK(key_region->has_parent());
239
240 std::set<const RegionData*>::const_iterator key_parent_it =
241 key_parents->find(&key_region->parent());
242
243 if (key_parent_it != key_parents->end())
244 key_parent = *key_parent_it;
245 }
246
247 if (name_region) {
248 DCHECK(name_region->has_parent());
249
250 std::set<const RegionData*>::const_iterator name_parent_it =
251 name_parents->find(&name_region->parent());
252
253 if (name_parent_it != name_parents->end())
254 name_parent = *name_parent_it;
255 }
256 }
257
258 // This algorithm assumes that iterating over a set is sorted.
Evan Stade 2014/06/20 22:32:27 comment doesn't make sense (to me at least)
please use gerrit instead 2014/06/24 21:23:42 Improved the comments and reduced the number of li
259 if (name_region && (!key_region || name_region < key_region)) {
260 if (!name_parents || name_parent || all_parents_match) {
261 added_suggestions = true;
262 suggestions->push_back(
263 new Suggestion(name_region, address_field, false, true));
264
265 if (name_regions)
266 name_regions->insert(name_region);
267 }
268
269 ++name_it;
270 } else if (key_region && (!name_region || key_region < name_region)) {
271 if (!key_parents || key_parent || all_parents_match) {
272 added_suggestions = true;
273 suggestions->push_back(
274 new Suggestion(key_region, address_field, true, false));
275
276 if (key_regions)
277 key_regions->insert(key_region);
278 }
279
280 ++key_it;
281 } else if (key_region == name_region) {
282 if (!key_parents || key_parent || all_parents_match) {
283 added_suggestions = true;
284 suggestions->push_back(
285 new Suggestion(key_region, address_field, true, true));
286
287 if (key_regions) {
288 key_regions->insert(key_region);
289 name_regions->insert(name_region);
290 }
291 }
292
293 ++key_it;
294 ++name_it;
295 }
296 }
297
298 return added_suggestions;
299 }
300
301 // Swaps in the result into |suggestions|. This object is not usable after
302 // this call due to using the swap() operation.
303 //
304 // Assumes that the number of sub-regions at each level is larger than the
305 // suggestion limit. Therefore, does not return anything when all regions
306 // match for a field.
307 void GetSuggestions(ScopedVector<Suggestion>* suggestions) {
308 DCHECK(suggestions);
309
310 if (!dependent_locality_.empty())
311 suggestions->swap(dependent_locality_);
312 else if (!locality_.empty())
313 suggestions->swap(locality_);
314 else
315 suggestions->swap(administrative_area_);
316 }
317
318 private:
319 // Sets of non-owned regions for fast lookup of parent regions.
320 std::set<const RegionData*> administrative_area_key_regions_;
321 std::set<const RegionData*> administrative_area_name_regions_;
322 std::set<const RegionData*> locality_key_regions_;
323 std::set<const RegionData*> locality_name_regions_;
324
325 bool all_administrative_areas_match_;
326 bool all_localities_match_;
327
328 ScopedVector<Suggestion> administrative_area_;
329 ScopedVector<Suggestion> locality_;
330 ScopedVector<Suggestion> dependent_locality_;
331
332 DISALLOW_COPY_AND_ASSIGN(CountrySuggestions);
333 };
334
335 // Region data for a field.
336 class FieldData {
337 public:
338 explicit FieldData(AddressField field) : field_(field) {
339 DCHECK(field >= ADMIN_AREA);
340 DCHECK(field <= DEPENDENT_LOCALITY);
341 }
342
343 ~FieldData() {}
344
345 // Returns the field for all of the regions.
346 AddressField field() const { return field_; }
347
348 // Returns true if no data was added in AddRegionData().
349 bool empty() const { return keys_.empty() && names_.empty(); }
350
351 // Adds |region_data| with specified |key| and |name|. Does not use the key
352 // and name from |region_data| to allow for string canonicalization. Does not
353 // take ownership of |region_data|.
354 void AddRegionData(const std::string& key,
355 const std::string& name,
356 const RegionData* region_data) {
357 DCHECK(region_data);
358 if (!key.empty())
359 keys_.AddDataForKey(key, region_data);
360 if (!name.empty())
361 names_.AddDataForKey(name, region_data);
362 }
363
364 // Looks up |search_term| in the regions that were previously added via
365 // AddRegionData(). Saves the regions that match the |search_term| by key into
366 // |regions_match_key| and by name into |regions_match_name|, which should not
367 // be NULL. The caller does not own the RegionData objects.
368 void FindRegionData(const std::string& search_term,
369 std::set<const RegionData*>* regions_match_key,
370 std::set<const RegionData*>* regions_match_name) const {
371 DCHECK(regions_match_key);
372 DCHECK(regions_match_name);
373 keys_.FindDataForKeyPrefix(search_term, regions_match_key);
374 names_.FindDataForKeyPrefix(search_term, regions_match_name);
375 }
376
377 private:
378 const AddressField field_;
379 Trie<const RegionData*> keys_;
380 Trie<const RegionData*> names_;
381
382 DISALLOW_COPY_AND_ASSIGN(FieldData);
383 };
384
385 // Region data for a country.
386 class CountryData {
387 public:
388 CountryData(const RegionData& country_region,
389 const StringCanonicalizer& shared_canonicalizer)
390 : administrative_area_(ADMIN_AREA),
391 locality_(LOCALITY),
392 dependent_locality_(DEPENDENT_LOCALITY),
393 canonicalizer_(shared_canonicalizer) {
394 DCHECK(!country_region.has_parent());
395
396 if (!country_region.sub_regions().empty())
397 AddSubRegionsOf(country_region, COUNTRY);
398
399 if (!dependent_locality_.empty())
400 min_region_size_ = DEPENDENT_LOCALITY;
401 else if (!locality_.empty())
402 min_region_size_ = LOCALITY;
403 else if (!administrative_area_.empty())
404 min_region_size_ = ADMIN_AREA;
405 else
406 min_region_size_ = COUNTRY;
407 }
408
409 ~CountryData() {}
410
411 void GetSuggestions(const AddressData& user_input,
412 AddressField focused_field,
413 ScopedVector<Suggestion>* results) const {
414 // Do not suggest anything if there's no suggestion data for any fields.
415 if (administrative_area_.empty())
416 return;
417
418 // Do not suggest anything if there's no suggestion data for the focused
419 // field.
420 if (focused_field != POSTAL_CODE && min_region_size_ < focused_field)
421 return;
422
423 // Canonicalized user input data for lookup in the tries.
424 AddressData canonical_input = user_input;
425 for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i) {
426 AddressField address_field = static_cast<AddressField>(i);
427 const std::string& field_value = user_input.GetFieldValue(address_field);
428 if (!field_value.empty()) {
429 canonical_input.SetFieldValue(address_field,
430 canonicalizer_.Canonicalize(field_value));
431 }
432 }
433
434 // Non-owned regions that match a field value in user input by region key or
435 // name.
436 std::set<const RegionData*> regions_match_key;
437 std::set<const RegionData*> regions_match_name;
438
439 CountrySuggestions suggestions;
440 for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY;
441 ++i) {
442 AddressField address_field = static_cast<AddressField>(i);
443 AddressField parent_address_field = static_cast<AddressField>(i - 1);
444
445 const std::string& canonical_field_value =
446 canonical_input.GetFieldValue(address_field);
447
448 if (canonical_field_value.empty() &&
449 (address_field == ADMIN_AREA ||
450 canonical_input.GetFieldValue(parent_address_field).empty())) {
451 suggestions.AllRegionsMatchForField(address_field);
452 continue;
453 }
454
455 regions_match_key.clear();
456 regions_match_name.clear();
457
458 GetRegionDataForField(address_field).FindRegionData(
459 canonical_field_value, &regions_match_key, &regions_match_name);
460
461 bool added_suggestions = suggestions.AddRegions(
462 address_field, regions_match_key, regions_match_name);
463
464 // Do not suggest anything if the focused field does not have suggestions.
465 if (address_field == focused_field && !added_suggestions) {
466 return;
467 }
468 }
469
470 suggestions.GetSuggestions(results);
471 }
472
473 private:
474 // Returns the regions for |field|, which should be either ADMIN_AREA,
475 // LOCALITY, or DEPENDENT_LOCALITY.
476 FieldData& GetRegionDataForField(AddressField field) {
477 switch (field) {
478 case ADMIN_AREA:
479 return administrative_area_;
480 case LOCALITY:
481 return locality_;
482 case DEPENDENT_LOCALITY:
483 return dependent_locality_;
484 default:
485 break;
486 }
487 NOTREACHED();
488 return administrative_area_;
489 }
490
491 const FieldData& GetRegionDataForField(AddressField field) const {
492 return const_cast<CountryData*>(this)->GetRegionDataForField(field);
493 }
494
495 void AddSubRegionsOf(const RegionData& parent_region,
496 AddressField parent_field) {
497 DCHECK(!parent_region.sub_regions().empty());
498
499 AddressField field = static_cast<AddressField>(parent_field + 1);
500 DCHECK(field >= ADMIN_AREA);
501 DCHECK(field <= DEPENDENT_LOCALITY);
502
503 FieldData& field_regions = GetRegionDataForField(field);
504 for (std::vector<const RegionData*>::const_iterator it =
505 parent_region.sub_regions().begin();
506 it != parent_region.sub_regions().end();
507 ++it) {
508 const RegionData* region = *it;
509 DCHECK(region);
510
511 field_regions.AddRegionData(canonicalizer_.Canonicalize(region->key()),
512 canonicalizer_.Canonicalize(region->name()),
513 region);
514 if (!region->sub_regions().empty())
515 AddSubRegionsOf(*region, field);
516 }
517 }
518
519 FieldData administrative_area_;
520 FieldData locality_;
521 FieldData dependent_locality_;
522 AddressField min_region_size_;
523 const StringCanonicalizer& canonicalizer_;
524
525 DISALLOW_COPY_AND_ASSIGN(CountryData);
526 };
527
528 } // namespace
529
530 // Region data for all countries.
531 class InputSuggester::AllCountries {
532 public:
533 AllCountries(PreloadSupplier* supplier) : region_data_builder_(supplier) {}
534
535 ~AllCountries() { STLDeleteValues(&regions_); }
536
537 void GetSuggestions(const AddressData& user_input,
538 AddressField focused_field,
539 ScopedVector<Suggestion>* suggestions) {
540 // Lazily initialize the mapping from fields to Trie objects.
541 std::string unused_best_language;
542 const RegionData& region_data =
543 region_data_builder_.Build(user_input.region_code,
544 user_input.language_code,
545 &unused_best_language);
546
547 std::map<const RegionData*, const CountryData*>::iterator country_data_it =
548 regions_.find(&region_data);
549 if (country_data_it == regions_.end()) {
550 country_data_it =
551 regions_.insert(std::make_pair(
552 &region_data,
553 new CountryData(region_data, canonicalizer_)))
554 .first;
555 }
556 DCHECK(country_data_it->second);
557
558 country_data_it->second->GetSuggestions(
559 user_input, focused_field, suggestions);
560 }
561
562 private:
563 // A mapping from a COUNTRY level region to a collection of all of its
564 // sub-regions.
565 std::map<const RegionData*, const CountryData*> regions_;
566
567 // Data source for region data.
568 RegionDataBuilder region_data_builder_;
569
570 // Canonicalizes strings for case and diacritic insensitive search.
571 const StringCanonicalizer canonicalizer_;
572
573 DISALLOW_COPY_AND_ASSIGN(AllCountries);
574 };
575
576 InputSuggester::InputSuggester(PreloadSupplier* supplier)
577 : all_regions_(new AllCountries(supplier)) {
578 }
579
580 InputSuggester::~InputSuggester() {
581 }
582
583 void InputSuggester::GetSuggestions(const AddressData& user_input,
584 AddressField focused_field,
585 size_t suggestions_limit,
586 std::vector<AddressData>* suggestions) {
587 DCHECK(suggestions);
588 DCHECK(focused_field == POSTAL_CODE ||
589 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
590
591 // Do not suggest anything if the user input is empty.
592 if (user_input.IsFieldEmpty(focused_field))
593 return;
594
595 // Build the list of regions that match |user_input| when the user is typing
596 // in the |focused_field|.
597 ScopedVector<Suggestion> suggested_regions;
598 all_regions_->GetSuggestions(user_input, focused_field, &suggested_regions);
599
600 // Generate suggestions based on the regions.
601 for (ScopedVector<Suggestion>::const_iterator suggestion_it =
602 suggested_regions.begin();
603 suggestion_it != suggested_regions.end();
604 ++suggestion_it) {
605 Suggestion* suggested_region = *suggestion_it;
606
607 // Do not add more suggestions than |suggestions_limit|.
608 if (suggestions->size() >= suggestions_limit) {
609 suggestions->clear();
610 return;
611 }
612
613 AddressData address;
614 address.region_code = user_input.region_code;
615 address.postal_code = user_input.postal_code;
616
617 // Traverse the tree of regions from the most specific |region| to the
618 // country-wide "root" of the tree. Use the region names found at each of
619 // the levels of the ruleset tree to build the |suggestion|.
620 AddressField address_field = suggested_region->matching_address_field();
621 for (const RegionData* region = &suggested_region->region_to_suggest();
622 region->has_parent();
623 region = &region->parent()) {
624 address.SetFieldValue(address_field,
625 suggested_region->region_key_matches()
626 ? region->key()
627 : region->name());
628 address_field = static_cast<AddressField>(address_field - 1);
629 }
630
631 suggestions->push_back(address);
632 }
633 }
634
635 } // namespace autofill
OLDNEW
« no previous file with comments | « third_party/libaddressinput/chromium/input_suggester.h ('k') | third_party/libaddressinput/chromium/json.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698