Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(311)

Side by Side Diff: third_party/libaddressinput/chromium/input_suggester.cc

Issue 298863012: Use upstream libaddressinput in Chrome. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Validate required fields without rules. Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/libaddressinput/chromium/input_suggester.h"
6
7 #include <map>
8 #include <set>
9 #include <utility>
10
11 #include "base/basictypes.h"
12 #include "base/logging.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "third_party/icu/source/common/unicode/errorcode.h"
15 #include "third_party/icu/source/common/unicode/locid.h"
16 #include "third_party/icu/source/common/unicode/unistr.h"
17 #include "third_party/icu/source/common/unicode/utypes.h"
18 #include "third_party/icu/source/i18n/unicode/coll.h"
19 #include "third_party/libaddressinput/chromium/trie.h"
20 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da ta.h"
21 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_su pplier.h"
22 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a.h"
23 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a_builder.h"
24
25 namespace autofill {
26
27 using ::i18n::addressinput::AddressData;
28 using ::i18n::addressinput::AddressField;
29 using ::i18n::addressinput::ADMIN_AREA;
30 using ::i18n::addressinput::COUNTRY;
31 using ::i18n::addressinput::DEPENDENT_LOCALITY;
32 using ::i18n::addressinput::POSTAL_CODE;
33 using ::i18n::addressinput::PreloadSupplier;
34 using ::i18n::addressinput::RegionData;
35 using ::i18n::addressinput::RegionDataBuilder;
36
37 namespace {
38
39 // The types of fields that describe a region.
40 enum RegionField {
41 KEY,
42 NAME,
43 };
44
45 // A bitset used for RegionField enum.
46 typedef int Bitset;
47
48 // A type to store a set of pointers to RegionData objects.
49 typedef std::set<const RegionData*> RegionContainer;
50
51 // Returns bitset with |bit| already set.
52 Bitset BitsetFromBit(int bit) {
53 return 1 << bit;
54 }
55
56 // Returns a union of the bitests |a| and |b|.
57 Bitset AddBitsets(Bitset a, Bitset b) {
58 return a | b;
59 }
60
61 // Returns true if |bit| is set in the |bitset|.
62 bool TestBit(Bitset bitset, int bit) {
Evan Stade 2014/06/19 00:38:00 this is not the idiomatic way to deal with bitmask
please use gerrit instead 2014/06/20 21:09:15 Got rid of bitmaps all together.
63 return !!(bitset & (1 << bit));
64 }
65
66 // If |KEY| bit is set in the |value| bitset, then returns the key of the
67 // region. If |NAME| bit is set in the |value| bitset, then returns the name of
68 // the region. The |value| bitset should not be empty.
69 const std::string& GetValueFromRegion(const RegionData* region, Bitset value) {
70 DCHECK(region);
71
72 if (TestBit(value, KEY))
73 return region->key();
74 else if (TestBit(value, NAME))
75 return region->name();
76 else
77 NOTREACHED();
78
79 return region->key();
80 }
81
82 // Collects regions based on whether they have a parent in the given list.
83 class ParentedRegionCollector {
Evan Stade 2014/06/19 00:38:00 you created this entire class so you could use std
please use gerrit instead 2014/06/20 21:09:15 Removed and came up with a better implementation.
84 public:
85 // Retains a reference to both of the parameters. Does not make a copy of
86 // |parent_regions|. Does not take ownership of |regions_with_parents|. The
87 // |regions_with_parents| parameter should not be NULL.
88 ParentedRegionCollector(const RegionContainer& parent_regions,
89 RegionContainer* regions_with_parents)
90 : parent_regions_(parent_regions),
91 regions_with_parents_(regions_with_parents) {
92 DCHECK(regions_with_parents_);
93 }
94
95 ~ParentedRegionCollector() {}
96
97 // Adds |region_to_test| to the |regions_with_parents_| collection, if the
98 // given region has a parent in |parent_regions_|. The |region_to_test|
99 // parameter should not be NULL.
100 void operator()(const RegionData* region_to_test) {
101 DCHECK(region_to_test);
102 if (parent_regions_.find(&region_to_test->parent()) !=
103 parent_regions_.end()) {
104 regions_with_parents_->insert(region_to_test);
105 }
106 }
107
108 private:
109 const RegionContainer& parent_regions_;
110 RegionContainer* regions_with_parents_;
111 };
112
113 // A region and its metadata useful for constructing a suggestion.
114 struct SuggestionRegion {
115 SuggestionRegion(const RegionData* region,
116 Bitset region_fields,
117 AddressField address_field)
118 : region(region),
119 region_fields(region_fields),
120 address_field(address_field) {}
121
122 ~SuggestionRegion() {}
123
124 // Not owned region data.
125 const RegionData* region;
126
127 // A bitset of region fields of the |region|.
128 Bitset region_fields;
129
130 // The address field of the |region|.
131 AddressField address_field;
132 };
133
134 // Removes duplicates based on RegionData and sums up the region field bitsets
135 // of the duplicates.
136 void RemoveDuplicateRegions(std::vector<SuggestionRegion>* regions) {
137 DCHECK(regions);
138
139 std::map<const RegionData*, SuggestionRegion*> uniques;
140 for (std::vector<SuggestionRegion>::iterator region_it = regions->begin();
141 region_it != regions->end(); ++region_it) {
142 SuggestionRegion* lookup = &(*region_it);
143 std::map<const RegionData*, SuggestionRegion*>::iterator unique_it =
144 uniques.find(lookup->region);
145 if (unique_it == uniques.end())
146 unique_it = uniques.insert(std::make_pair(lookup->region, lookup)).first;
147 SuggestionRegion* target = unique_it->second;
148 target->region_fields =
149 AddBitsets(target->region_fields, lookup->region_fields);
150 }
151
152 std::vector<SuggestionRegion> result;
153 for (std::map<const RegionData*, SuggestionRegion*>::const_iterator
154 unique_it = uniques.begin();
155 unique_it != uniques.end(); ++unique_it) {
156 result.push_back(*unique_it->second);
157 }
158 regions->swap(result);
159 }
160
161 } // namespace
162
163 // Builds and caches tries from region data.
164 class InputSuggester::TrieCollection {
165 public:
166 TrieCollection(PreloadSupplier* supplier)
167 : region_data_builder_(supplier) {
168 UErrorCode error_code = U_ZERO_ERROR;
169 collator_.reset(
170 icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
171 DCHECK(U_SUCCESS(error_code));
172 collator_->setStrength(icu::Collator::PRIMARY);
173 }
174
175 ~TrieCollection() {
176 // Delete the maps and Trie objects owned by |tries_| field.
177 for (RegionMap::const_iterator region_it = tries_.begin();
178 region_it != tries_.end(); ++region_it) {
179 FieldMap* field_map = region_it->second;
180 DCHECK(field_map);
181
182 for (FieldMap::const_iterator field_it = field_map->begin();
183 field_it != field_map->end(); ++field_it) {
184 IdMap* id_map = field_it->second;
185 DCHECK(id_map);
186
187 for (IdMap::const_iterator id_it = id_map->begin();
188 id_it != id_map->end(); ++id_it) {
189 RegionDataTrie* trie = id_it->second;
190 delete trie;
191 }
192
193 delete id_map;
194 }
195
196 delete field_map;
197 }
198 }
199
200 // Builds a list of regions that can be suggested as completions for
201 // |user_input| when the user is typing into |focused_field|.
202 std::vector<SuggestionRegion> BuildSuggestionRegions(
203 const AddressData& user_input,
204 AddressField focused_field) {
205 std::vector<SuggestionRegion> result;
206
207 // Lazily initialize the mapping from fields to Trie objects.
208 std::string unused_best_language;
209 const RegionData& region_data = region_data_builder_.Build(
210 user_input.region_code, user_input.language_code,
211 &unused_best_language);
212 RegionMap::iterator tries_it = tries_.find(&region_data);
213 if (tries_it == tries_.end()) {
214 tries_it = tries_.insert(
215 std::make_pair(&region_data, new FieldMap)).first;
216 if (!region_data.sub_regions().empty())
217 AddTriesForSubRegionsOf(region_data, COUNTRY, tries_it->second);
218 }
219 const FieldMap& field_map = *(tries_it->second);
220
221 // Do not suggest anything if there's no suggestion data for any fields.
222 if (field_map.empty())
223 return result;
224
225 // Do not suggest anything if there's no suggestion data for the focused
226 // field.
227 if (focused_field != POSTAL_CODE &&
228 field_map.find(focused_field) == field_map.end()) {
229 return result;
230 }
231
232 // Calculate the most specific field with suggestion data.
233 AddressField deepest_field_with_data = DEPENDENT_LOCALITY;
234 while (field_map.find(deepest_field_with_data) == field_map.end()) {
235 deepest_field_with_data =
236 static_cast<AddressField>(deepest_field_with_data - 1);
237 }
238
239 // Determine the most specific address field that can be suggested.
240 AddressField most_specific_field = focused_field != POSTAL_CODE
241 ? focused_field : DEPENDENT_LOCALITY;
242 if (most_specific_field > deepest_field_with_data) {
243 most_specific_field = deepest_field_with_data;
244 }
245 if (focused_field != POSTAL_CODE) {
246 while (user_input.GetFieldValue(most_specific_field).empty() &&
247 most_specific_field > ADMIN_AREA) {
248 most_specific_field =
249 static_cast<AddressField>(most_specific_field - 1);
250 }
251 }
252
253 // Canonicalized user input data for lookup in the tries.
254 AddressData canonical_input = user_input;
255 for (int i = ADMIN_AREA; i <= most_specific_field; ++i) {
256 AddressField address_field = static_cast<AddressField>(i);
257 canonical_input.SetFieldValue(
258 address_field,
259 CanonicalizeString(user_input.GetFieldValue(address_field)));
260 }
261
262 AddressField suggestion_field = ADMIN_AREA;
263
264 typedef std::map<RegionField, RegionContainer*> IdRegionContainer;
265 typedef std::map<AddressField, IdRegionContainer*> FieldRegionContainer;
266 FieldRegionContainer matching_regions;
267
268 IdRegionContainer* parent_id_regions = NULL;
269 for (int i = ADMIN_AREA; i <= most_specific_field; ++i) {
270 AddressField address_field = static_cast<AddressField>(i);
271
272 IdRegionContainer* id_regions = new IdRegionContainer;
273 matching_regions.insert(std::make_pair(address_field, id_regions));
274
275 FieldMap::const_iterator field_map_it = field_map.find(address_field);
276 DCHECK(field_map_it != field_map.end());
277 const IdMap* id_map = field_map_it->second;
278 DCHECK(id_map);
279
280 for (IdMap::const_iterator id_map_it = id_map->begin();
281 id_map_it != id_map->end(); ++id_map_it) {
282 RegionField id = id_map_it->first;
283
284 RegionContainer* regions = new RegionContainer;
285 id_regions->insert(std::make_pair(id, regions));
286
287 const RegionDataTrie* trie = id_map_it->second;
288 DCHECK(trie);
289
290 trie->FindDataForKeyPrefix(
291 canonical_input.GetFieldValue(address_field), regions);
292
293 // Filter out the regions whose parents do not match the user input.
294 if (address_field > ADMIN_AREA) {
295 DCHECK(parent_id_regions);
296
297 IdRegionContainer::const_iterator parent_id_regions_it =
298 parent_id_regions->find(id);
299 DCHECK(parent_id_regions_it != parent_id_regions->end());
300
301 RegionContainer* parent_regions = parent_id_regions_it->second;
302 DCHECK(parent_regions);
303
304 RegionContainer regions_with_parents;
305 std::for_each(regions->begin(), regions->end(),
306 ParentedRegionCollector(
307 *parent_regions, &regions_with_parents));
308 regions->swap(regions_with_parents);
309 }
310
311 for (RegionContainer::const_iterator region_it = regions->begin();
312 region_it != regions->end(); ++region_it) {
313 if (address_field > suggestion_field) {
314 suggestion_field = address_field;
315 result.clear();
316 }
317 result.push_back(
318 SuggestionRegion(*region_it, BitsetFromBit(id), address_field));
319 }
320 }
321
322 parent_id_regions = id_regions;
323 }
324
325 // Cleanup.
Evan Stade 2014/06/19 00:38:00 please do not try to handle bare pointers, use stl
please use gerrit instead 2014/06/20 21:09:15 Done.
326 for (FieldRegionContainer::const_iterator
327 field_it = matching_regions.begin();
328 field_it != matching_regions.end(); ++field_it) {
329 IdRegionContainer* id_regions = field_it->second;
330 DCHECK(id_regions);
331 for (IdRegionContainer::const_iterator id_it = id_regions->begin();
332 id_it != id_regions->end(); ++id_it) {
333 RegionContainer* regions = id_it->second;
334 delete regions;
335 }
336 delete id_regions;
337 }
338
339 RemoveDuplicateRegions(&result);
340 return result;
341 }
342
343 private:
344 typedef Trie<const RegionData*> RegionDataTrie;
345 typedef std::map<RegionField, RegionDataTrie*> IdMap;
Evan Stade 2014/06/19 00:38:00 serious? you need a map for this? RegionField only
please use gerrit instead 2014/06/20 21:09:15 Switched to structs and classes.
346 typedef std::map<AddressField, IdMap*> FieldMap;
347 typedef std::map<const RegionData*, FieldMap*> RegionMap;
348
349 // Recursively builds tries from subregions of |parent| and add these tries to
350 // the |field_map|.
351 void AddTriesForSubRegionsOf(const RegionData& parent,
352 AddressField parent_field,
353 FieldMap* field_map) {
354 DCHECK(!parent.sub_regions().empty());
355 DCHECK(field_map);
356
357 AddressField field = static_cast<AddressField>(parent_field + 1);
358 DCHECK_GE(field, ADMIN_AREA);
359 DCHECK_LE(field, DEPENDENT_LOCALITY);
360
361 FieldMap::iterator field_it = field_map->find(field);
362 if (field_it == field_map->end())
363 field_it = field_map->insert(std::make_pair(field, new IdMap)).first;
364
365 IdMap* id_map = field_it->second;
366 DCHECK(id_map);
367
368 IdMap::iterator key_it = id_map->find(KEY);
369 if (key_it == id_map->end())
370 key_it = id_map->insert(std::make_pair(KEY, new RegionDataTrie)).first;
371 RegionDataTrie* key_trie = key_it->second;
372 DCHECK(key_trie);
373
374 IdMap::iterator name_it = id_map->find(NAME);
375 if (name_it == id_map->end())
376 name_it = id_map->insert(std::make_pair(NAME, new RegionDataTrie)).first;
377 RegionDataTrie* name_trie = name_it->second;
378 DCHECK(name_trie);
379
380 for (std::vector<const RegionData*>::const_iterator region_it =
381 parent.sub_regions().begin();
382 region_it != parent.sub_regions().end(); ++region_it) {
383 const RegionData* region = *region_it;
384 DCHECK(region);
385
386 key_trie->AddDataForKey(CanonicalizeString(region->key()), region);
387
388 if (!region->name().empty()) {
389 name_trie->AddDataForKey(CanonicalizeString(region->name()), region);
390 }
391
392 if (!region->sub_regions().empty())
393 AddTriesForSubRegionsOf(*region, field, field_map);
394 }
395 }
396
397 // Returns a canonical version of the string that can be used for comparing
398 // strings regardless of diacritics and capitalization.
399 // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas");
400 // CanonicalizeString("Texas") == CanonicalizeString("teXas");
401 // CanonicalizeString("Texas") != CanonicalizeString("California");
402 //
403 // The output is not human-readable.
404 // CanonicalizeString("Texas") != "Texas";
405 std::string CanonicalizeString(const std::string& original) const {
406 icu::UnicodeString icu_str(
407 original.c_str(), static_cast<int32_t>(original.length()));
408 int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0);
409 scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
410 DCHECK(buffer.get());
411 int32_t filled_size =
412 collator_->getSortKey(icu_str, buffer.get(), buffer_size);
413 DCHECK_EQ(buffer_size, filled_size);
414 return std::string(reinterpret_cast<const char*>(buffer.get()));
415 }
416
417 // A mapping of the country-level RegionData objects to a collection of Trie
418 // objects. Tries and maps are owned. RegionDatas are not owned.
419 RegionMap tries_;
420
421 // Data source for region data.
422 RegionDataBuilder region_data_builder_;
423
424 // Canonicalizes strings for case and diacritic insensitive search.
425 scoped_ptr<icu::Collator> collator_;
426
427 DISALLOW_COPY_AND_ASSIGN(TrieCollection);
428 };
429
430 InputSuggester::InputSuggester(PreloadSupplier* supplier)
431 : trie_collection_(new TrieCollection(supplier)) {}
432
433 InputSuggester::~InputSuggester() {}
434
435 void InputSuggester::GetSuggestions(const AddressData& user_input,
436 AddressField focused_field,
437 size_t suggestions_limit,
438 std::vector<AddressData>* suggestions) {
439 DCHECK(suggestions);
440 DCHECK(focused_field == POSTAL_CODE ||
441 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
442
443 // Do not suggest anything if the user input is empty.
444 if (user_input.IsFieldEmpty(focused_field))
445 return;
446
447 // Build the list of regions that match |user_input| when the user is typing
448 // in the |focused_field|.
449 const std::vector<SuggestionRegion>& regions =
450 trie_collection_->BuildSuggestionRegions(user_input, focused_field);
451
452 // Generate suggestions based on the regions.
453 for (std::vector<SuggestionRegion>::const_iterator
454 region_it = regions.begin();
455 region_it != regions.end(); ++region_it) {
456 // Do not add more suggestions than |suggestions_limit|.
457 if (suggestions->size() >= suggestions_limit) {
458 suggestions->clear();
459 return;
460 }
461
462 AddressData suggestion;
463 suggestion.region_code = user_input.region_code;
464 suggestion.postal_code = user_input.postal_code;
465
466 // Traverse the tree of regions from the most specific |region| to the
467 // country-wide "root" of the tree. Use the region names found at each of
468 // the levels of the ruleset tree to build the |suggestion|.
469 DCHECK(region_it->region);
470 AddressField address_field = region_it->address_field;
471 for (const RegionData* suggestion_region = region_it->region;
472 suggestion_region->has_parent();
473 suggestion_region = &suggestion_region->parent()) {
474 suggestion.SetFieldValue(
475 address_field,
476 GetValueFromRegion(suggestion_region, region_it->region_fields));
477 address_field = static_cast<AddressField>(address_field - 1);
478 }
479
480 suggestions->push_back(suggestion);
481 }
482 }
483
484 } // namespace autofill
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698