Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(589)

Side by Side Diff: third_party/libaddressinput/chromium/suggestions.cc

Issue 298863012: Use upstream libaddressinput in Chrome. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Work in progress for suggestions impl. Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/libaddressinput/chromium/suggestions.h"
6
7 #include <map>
8 #include <set>
9 #include <utility>
10
11 #include "base/basictypes.h"
12 #include "base/logging.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "third_party/icu/source/common/unicode/errorcode.h"
15 #include "third_party/icu/source/common/unicode/locid.h"
16 #include "third_party/icu/source/common/unicode/unistr.h"
17 #include "third_party/icu/source/common/unicode/utypes.h"
18 #include "third_party/icu/source/i18n/unicode/coll.h"
19 #include "third_party/libaddressinput/chromium/trie.h"
20 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da ta.h"
21 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_su pplier.h"
22 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a.h"
23 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a_builder.h"
24
25 namespace autofill {
26
27 using ::i18n::addressinput::AddressData;
28 using ::i18n::addressinput::AddressField;
29 using ::i18n::addressinput::ADMIN_AREA;
30 using ::i18n::addressinput::COUNTRY;
31 using ::i18n::addressinput::DEPENDENT_LOCALITY;
32 using ::i18n::addressinput::POSTAL_CODE;
33 using ::i18n::addressinput::PreloadSupplier;
34 using ::i18n::addressinput::RegionData;
35 using ::i18n::addressinput::RegionDataBuilder;
36
37 namespace {
38
39 // The types of fields that describe a region.
40 enum RegionField {
41 KEY,
42 NAME,
43 };
44
45 // A bitset used for RegionField enum.
46 typedef int Bitset;
47
48 // A type to store a set of pointers to RegionData objects.
49 typedef std::set<const RegionData*> RegionContainer;
50
51 // Returns an empty bitset.
52 Bitset EmptyBitset() {
53 return 0;
54 }
55
56 // Returns bitset with |bit| already set.
57 Bitset BitsetFromBit(int bit) {
58 return 1 << bit;
59 }
60
61 // Sets the |bit| on |bitset|.
62 Bitset SetBit(Bitset bitset, int bit) {
63 return bitset | (1 << bit);
64 }
65
66 // Returns true if |bit| is set in the |bitset|.
67 bool TestBit(Bitset bitset, int bit) {
68 return !!(bitset & (1 << bit));
69 }
70
71 Bitset AddBitsets(Bitset a, Bitset b) {
72 return a | b;
73 }
74
75 // If |KEY| bit is set in the |value| bitset, then returns the key of the
76 // region. If |NAME| bit is set in the |value| bitset, then returns the name of
77 // the region. The |value| bitset should not be empty.
78 const std::string& GetValueFromRegion(const RegionData* region, Bitset value) {
79 DCHECK(region);
80
81 if (TestBit(value, KEY))
82 return region->key();
83 else if (TestBit(value, NAME))
84 return region->name();
85 else
86 NOTREACHED();
87
88 return region->key();
89 }
90
91 // Collects regions based on whether they have a parent in the given list.
92 class ParentedRegionCollector {
93 public:
94 // Retains a reference to both of the parameters. Does not make a copy of
95 // |parent_regions|. Does not take ownership of |regions_with_parents|. The
96 // |regions_with_parents| parameter should not be NULL.
97 ParentedRegionCollector(const RegionContainer& parent_regions,
98 RegionContainer* regions_with_parents)
99 : parent_regions_(parent_regions),
100 regions_with_parents_(regions_with_parents) {
101 DCHECK(regions_with_parents_);
102 }
103
104 ~ParentedRegionCollector() {}
105
106 // Adds |region_to_test| to the |regions_with_parents_| collection, if the
107 // given region has a parent in |parent_regions_|. The |region_to_test|
108 // parameter should not be NULL.
109 void operator()(const RegionData* region_to_test) {
110 DCHECK(region_to_test);
111 if (parent_regions_.find(&region_to_test->parent()) !=
112 parent_regions_.end()) {
113 regions_with_parents_->insert(region_to_test);
114 }
115 }
116
117 private:
118 const RegionContainer& parent_regions_;
119 RegionContainer* regions_with_parents_;
120 };
121
122 // A region and its metadata useful for constructing a suggestion.
123 struct SuggestionRegion {
124 SuggestionRegion(const RegionData* region,
125 Bitset region_fields,
126 AddressField address_field)
127 : region(region),
128 region_fields(region_fields),
129 address_field(address_field) {}
130
131 ~SuggestionRegion() {}
132
133 // Not owned region data.
134 const RegionData* region;
135
136 // A bitset of region fields of the |region|.
137 Bitset region_fields;
138
139 // The address field of the |region|.
140 AddressField address_field;
141 };
142
143 // Removes duplicates based on RegionData and sums up the region field bitsets
144 // of the duplicates.
145 void RemoveDuplicateRegions(std::vector<SuggestionRegion>* regions) {
146 DCHECK(regions);
147
148 std::map<const RegionData*, SuggestionRegion*> uniques;
149 for (std::vector<SuggestionRegion>::iterator region_it = regions->begin();
150 region_it != regions->end(); ++region_it) {
151 SuggestionRegion* lookup = &(*region_it);
152 std::map<const RegionData*, SuggestionRegion*>::iterator unique_it =
153 uniques.find(lookup->region);
154 if (unique_it == uniques.end())
155 unique_it = uniques.insert(std::make_pair(lookup->region, lookup)).first;
156 SuggestionRegion* target = unique_it->second;
157 target->region_fields =
158 AddBitsets(target->region_fields, lookup->region_fields);
159 }
160
161 std::vector<SuggestionRegion> result;
162 for (std::map<const RegionData*, SuggestionRegion*>::const_iterator
163 unique_it = uniques.begin();
164 unique_it != uniques.end(); ++unique_it) {
165 result.push_back(*unique_it->second);
166 }
167 regions->swap(result);
168 }
169
170 } // namespace
171
172 // Builds and caches tries from region data.
173 class Suggestions::TrieCollection {
174 public:
175 TrieCollection(PreloadSupplier* supplier)
176 : region_data_builder_(supplier) {
177 UErrorCode error_code = U_ZERO_ERROR;
178 collator_.reset(
179 icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
180 DCHECK(U_SUCCESS(error_code));
181 collator_->setStrength(icu::Collator::PRIMARY);
182 }
183
184 ~TrieCollection() {
185 // Delete the maps and Trie objects owned by |tries_| field.
186 for (RegionMap::const_iterator region_it = tries_.begin();
187 region_it != tries_.end(); ++region_it) {
188 FieldMap* field_map = region_it->second;
189 DCHECK(field_map);
190
191 for (FieldMap::const_iterator field_it = field_map->begin();
192 field_it != field_map->end(); ++field_it) {
193 IdMap* id_map = field_it->second;
194 DCHECK(id_map);
195
196 for (IdMap::const_iterator id_it = id_map->begin();
197 id_it != id_map->end(); ++id_it) {
198 RegionDataTrie* trie = id_it->second;
199 delete trie;
200 }
201
202 delete id_map;
203 }
204
205 delete field_map;
206 }
207 }
208
209 // Builds a list of regions that can be suggested as completions for
210 // |user_input| when the user is typing into |focused_field|.
211 std::vector<SuggestionRegion> BuildSuggestionRegions(
212 const AddressData& user_input,
213 AddressField focused_field) {
214 std::vector<SuggestionRegion> result;
215
216 // Lazily initialize the mapping from fields to Trie objects.
217 std::string unused_best_language;
218 const RegionData& region_data = region_data_builder_.Build(
219 user_input.region_code, user_input.language_code,
220 &unused_best_language);
221 RegionMap::iterator tries_it = tries_.find(&region_data);
222 if (tries_it == tries_.end()) {
223 tries_it = tries_.insert(
224 std::make_pair(&region_data, new FieldMap)).first;
225 if (!region_data.sub_regions().empty())
226 AddTriesForSubRegionsOf(region_data, COUNTRY, tries_it->second);
227 }
228 const FieldMap& field_map = *(tries_it->second);
229
230 // Do not suggest anything if there's no suggestion data for any fields.
231 if (field_map.empty())
232 return result;
233
234 // Do not suggest anything if there's no suggestion data for the focused
235 // field.
236 if (focused_field != POSTAL_CODE &&
237 field_map.find(focused_field) == field_map.end()) {
238 return result;
239 }
240
241 // Calculate the most specific field with suggestion data.
242 AddressField deepest_field_with_data = DEPENDENT_LOCALITY;
243 while (field_map.find(deepest_field_with_data) == field_map.end()) {
244 deepest_field_with_data
245 = static_cast<AddressField>(deepest_field_with_data - 1);
246 }
247
248 // Determine the most specific address field that can be suggested.
249 AddressField most_specific_field = focused_field != POSTAL_CODE
250 ? focused_field : DEPENDENT_LOCALITY;
251 if (most_specific_field > deepest_field_with_data) {
252 most_specific_field = deepest_field_with_data;
253 }
254 if (focused_field != POSTAL_CODE) {
255 while (user_input.GetFieldValue(most_specific_field).empty() &&
256 most_specific_field > ADMIN_AREA) {
257 most_specific_field =
258 static_cast<AddressField>(most_specific_field - 1);
259 }
260 }
261
262 // Canonicalized user input data for lookup in the tries.
263 AddressData canonical_input = user_input;
264 for (int i = ADMIN_AREA; i <= most_specific_field; ++i) {
265 AddressField address_field = static_cast<AddressField>(i);
266 canonical_input.SetFieldValue(
267 address_field,
268 CanonicalizeString(user_input.GetFieldValue(address_field)));
269 }
270
271 AddressField suggestion_field = ADMIN_AREA;
272
273 typedef std::map<RegionField, RegionContainer*> IdRegionContainer;
274 typedef std::map<AddressField, IdRegionContainer*> FieldRegionContainer;
275 FieldRegionContainer matching_regions;
276
277 IdRegionContainer* parent_id_regions = NULL;
278 for (int i = ADMIN_AREA; i <= most_specific_field; ++i) {
279 AddressField address_field = static_cast<AddressField>(i);
280
281 IdRegionContainer* id_regions = new IdRegionContainer;
282 matching_regions.insert(std::make_pair(address_field, id_regions));
283
284 FieldMap::const_iterator field_map_it = field_map.find(address_field);
285 DCHECK(field_map_it != field_map.end());
286 const IdMap* id_map = field_map_it->second;
287 DCHECK(id_map);
288
289 for (IdMap::const_iterator id_map_it = id_map->begin();
290 id_map_it != id_map->end(); ++id_map_it) {
291 RegionField id = id_map_it->first;
292
293 RegionContainer* regions = new RegionContainer;
294 id_regions->insert(std::make_pair(id, regions));
295
296 const RegionDataTrie* trie = id_map_it->second;
297 DCHECK(trie);
298
299 trie->FindDataForKeyPrefix(
300 canonical_input.GetFieldValue(address_field), regions);
301
302 // Filter out the regions whose parents do not match the user input.
303 if (address_field > ADMIN_AREA) {
304 DCHECK(parent_id_regions);
305
306 IdRegionContainer::const_iterator parent_id_regions_it =
307 parent_id_regions->find(id);
308 DCHECK(parent_id_regions_it != parent_id_regions->end());
309
310 RegionContainer* parent_regions = parent_id_regions_it->second;
311 DCHECK(parent_regions);
312
313 RegionContainer regions_with_parents;
314 std::for_each(regions->begin(), regions->end(),
315 ParentedRegionCollector(
316 *parent_regions, &regions_with_parents));
317 regions->swap(regions_with_parents);
318 }
319
320 for (RegionContainer::const_iterator region_it = regions->begin();
321 region_it != regions->end(); ++region_it) {
322 if (address_field > suggestion_field) {
323 suggestion_field = address_field;
324 result.clear();
325 }
326 result.push_back(
327 SuggestionRegion(*region_it, BitsetFromBit(id), address_field));
328 }
329 }
330
331 parent_id_regions = id_regions;
332 }
333
334 // Cleanup.
335 for (FieldRegionContainer::const_iterator
336 field_it = matching_regions.begin();
337 field_it != matching_regions.end(); ++field_it) {
338 IdRegionContainer* id_regions = field_it->second;
339 DCHECK(id_regions);
340 for (IdRegionContainer::const_iterator id_it = id_regions->begin();
341 id_it != id_regions->end(); ++id_it) {
342 RegionContainer* regions = id_it->second;
343 delete regions;
344 }
345 delete id_regions;
346 }
347
348 RemoveDuplicateRegions(&result);
349 return result;
350 }
351
352 private:
353 typedef Trie<const RegionData*> RegionDataTrie;
354 typedef std::map<RegionField, RegionDataTrie*> IdMap;
355 typedef std::map<AddressField, IdMap*> FieldMap;
356 typedef std::map<const RegionData*, FieldMap*> RegionMap;
357
358 // Recursively builds tries from subregions of |parent| and add these tries to
359 // the |field_map|.
360 void AddTriesForSubRegionsOf(const RegionData& parent,
361 AddressField parent_field,
362 FieldMap* field_map) {
363 DCHECK(!parent.sub_regions().empty());
364 DCHECK(field_map);
365
366 AddressField field = static_cast<AddressField>(parent_field + 1);
367 DCHECK_GE(field, ADMIN_AREA);
368 DCHECK_LE(field, DEPENDENT_LOCALITY);
369
370 FieldMap::iterator field_it = field_map->find(field);
371 if (field_it == field_map->end())
372 field_it = field_map->insert(std::make_pair(field, new IdMap)).first;
373
374 IdMap* id_map = field_it->second;
375 DCHECK(id_map);
376
377 IdMap::iterator key_it = id_map->find(KEY);
378 if (key_it == id_map->end())
379 key_it = id_map->insert(std::make_pair(KEY, new RegionDataTrie)).first;
380 RegionDataTrie* key_trie = key_it->second;
381 DCHECK(key_trie);
382
383 IdMap::iterator name_it = id_map->find(NAME);
384 if (name_it == id_map->end())
385 name_it = id_map->insert(std::make_pair(NAME, new RegionDataTrie)).first;
386 RegionDataTrie* name_trie = name_it->second;
387 DCHECK(name_trie);
388
389 for (std::vector<const RegionData*>::const_iterator region_it =
390 parent.sub_regions().begin();
391 region_it != parent.sub_regions().end(); ++region_it) {
392 const RegionData* region = *region_it;
393 DCHECK(region);
394
395 key_trie->AddDataForKey(CanonicalizeString(region->key()), region);
396
397 if (!region->name().empty()) {
398 name_trie->AddDataForKey(CanonicalizeString(region->name()), region);
399 }
400
401 if (!region->sub_regions().empty())
402 AddTriesForSubRegionsOf(*region, field, field_map);
403 }
404 }
405
406 // Returns a canonical version of the string that can be used for comparing
407 // strings regardless of diacritics and capitalization.
408 // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas");
409 // CanonicalizeString("Texas") == CanonicalizeString("teXas");
410 // CanonicalizeString("Texas") != CanonicalizeString("California");
411 //
412 // The output is not human-readable.
413 // CanonicalizeString("Texas") != "Texas";
414 std::string CanonicalizeString(const std::string& original) const {
415 icu::UnicodeString icu_str(
416 original.c_str(), static_cast<int32_t>(original.length()));
417 int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0);
418 scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
419 DCHECK(buffer.get());
420 int32_t filled_size =
421 collator_->getSortKey(icu_str, buffer.get(), buffer_size);
422 DCHECK_EQ(buffer_size, filled_size);
423 return std::string(reinterpret_cast<const char*>(buffer.get()));
424 }
425
426 // A mapping of the country-level RegionData objects to a collection of Trie
427 // objects. Tries and maps are owned. RegionDatas are not owned.
428 RegionMap tries_;
429
430 // Data source for region data.
431 RegionDataBuilder region_data_builder_;
432
433 // Canonicalizes strings for case and diacritic insensitive search.
434 scoped_ptr<icu::Collator> collator_;
435
436 DISALLOW_COPY_AND_ASSIGN(TrieCollection);
437 };
438
439 Suggestions::Suggestions(PreloadSupplier* supplier)
440 : trie_collection_(new TrieCollection(supplier)) {}
441
442 Suggestions::~Suggestions() {}
443
444 void Suggestions::GetSuggestions(const AddressData& user_input,
445 AddressField focused_field,
446 size_t suggestions_limit,
447 std::vector<AddressData>* suggestions) {
448 DCHECK(suggestions);
449 DCHECK(focused_field == POSTAL_CODE ||
450 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
451
452 // Do not suggest anything if the user input is empty.
453 if (user_input.IsFieldEmpty(focused_field))
454 return;
455
456 // Build the list of regions that match |user_input| when the user is typing
457 // in the |focused_field|.
458 const std::vector<SuggestionRegion>& regions =
459 trie_collection_->BuildSuggestionRegions(user_input, focused_field);
460
461 // Generate suggestions based on the regions.
462 for (std::vector<SuggestionRegion>::const_iterator
463 region_it = regions.begin();
464 region_it != regions.end(); ++region_it) {
465 // Do not add more suggestions than |suggestions_limit|.
466 if (suggestions->size() >= suggestions_limit) {
467 suggestions->clear();
468 return;
469 }
470
471 AddressData suggestion;
472 suggestion.region_code = user_input.region_code;
473 suggestion.postal_code = user_input.postal_code;
474
475 // Traverse the tree of regions from the most specific |region| to the
476 // country-wide "root" of the tree. Use the region names found at each of
477 // the levels of the ruleset tree to build the |suggestion|.
478 DCHECK(region_it->region);
479 AddressField address_field = region_it->address_field;
480 for (const RegionData* suggestion_region = region_it->region;
481 suggestion_region->has_parent();
482 suggestion_region = &suggestion_region->parent()) {
483 suggestion.SetFieldValue(
484 address_field,
485 GetValueFromRegion(suggestion_region, region_it->region_fields));
486 address_field = static_cast<AddressField>(address_field - 1);
487 }
488
489 suggestions->push_back(suggestion);
490 }
491 }
492
493 } // namespace autofill
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698