OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "third_party/libaddressinput/chromium/input_suggester.h" | |
6 | |
7 #include <map> | |
8 #include <set> | |
9 #include <utility> | |
10 | |
11 #include "base/basictypes.h" | |
12 #include "base/logging.h" | |
13 #include "base/memory/scoped_ptr.h" | |
14 #include "third_party/icu/source/common/unicode/errorcode.h" | |
15 #include "third_party/icu/source/common/unicode/locid.h" | |
16 #include "third_party/icu/source/common/unicode/unistr.h" | |
17 #include "third_party/icu/source/common/unicode/utypes.h" | |
18 #include "third_party/icu/source/i18n/unicode/coll.h" | |
19 #include "third_party/libaddressinput/chromium/trie.h" | |
20 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da ta.h" | |
21 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_su pplier.h" | |
22 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a.h" | |
23 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a_builder.h" | |
24 | |
25 namespace autofill { | |
26 | |
27 using ::i18n::addressinput::AddressData; | |
28 using ::i18n::addressinput::AddressField; | |
29 using ::i18n::addressinput::ADMIN_AREA; | |
30 using ::i18n::addressinput::COUNTRY; | |
31 using ::i18n::addressinput::DEPENDENT_LOCALITY; | |
32 using ::i18n::addressinput::POSTAL_CODE; | |
33 using ::i18n::addressinput::PreloadSupplier; | |
34 using ::i18n::addressinput::RegionData; | |
35 using ::i18n::addressinput::RegionDataBuilder; | |
36 | |
37 namespace { | |
38 | |
39 // The types of fields that describe a region. | |
40 enum RegionField { | |
41 KEY, | |
42 NAME, | |
43 }; | |
44 | |
45 // A bitset used for RegionField enum. | |
46 typedef int Bitset; | |
47 | |
48 // A type to store a set of pointers to RegionData objects. | |
49 typedef std::set<const RegionData*> RegionContainer; | |
50 | |
51 // Returns bitset with |bit| already set. | |
52 Bitset BitsetFromBit(int bit) { | |
53 return 1 << bit; | |
54 } | |
55 | |
56 // Returns a union of the bitests |a| and |b|. | |
57 Bitset AddBitsets(Bitset a, Bitset b) { | |
58 return a | b; | |
59 } | |
60 | |
61 // Returns true if |bit| is set in the |bitset|. | |
62 bool TestBit(Bitset bitset, int bit) { | |
Evan Stade
2014/06/19 00:38:00
this is not the idiomatic way to deal with bitmask
please use gerrit instead
2014/06/20 21:09:15
Got rid of bitmaps all together.
| |
63 return !!(bitset & (1 << bit)); | |
64 } | |
65 | |
66 // If |KEY| bit is set in the |value| bitset, then returns the key of the | |
67 // region. If |NAME| bit is set in the |value| bitset, then returns the name of | |
68 // the region. The |value| bitset should not be empty. | |
69 const std::string& GetValueFromRegion(const RegionData* region, Bitset value) { | |
70 DCHECK(region); | |
71 | |
72 if (TestBit(value, KEY)) | |
73 return region->key(); | |
74 else if (TestBit(value, NAME)) | |
75 return region->name(); | |
76 else | |
77 NOTREACHED(); | |
78 | |
79 return region->key(); | |
80 } | |
81 | |
82 // Collects regions based on whether they have a parent in the given list. | |
83 class ParentedRegionCollector { | |
Evan Stade
2014/06/19 00:38:00
you created this entire class so you could use std
please use gerrit instead
2014/06/20 21:09:15
Removed and came up with a better implementation.
| |
84 public: | |
85 // Retains a reference to both of the parameters. Does not make a copy of | |
86 // |parent_regions|. Does not take ownership of |regions_with_parents|. The | |
87 // |regions_with_parents| parameter should not be NULL. | |
88 ParentedRegionCollector(const RegionContainer& parent_regions, | |
89 RegionContainer* regions_with_parents) | |
90 : parent_regions_(parent_regions), | |
91 regions_with_parents_(regions_with_parents) { | |
92 DCHECK(regions_with_parents_); | |
93 } | |
94 | |
95 ~ParentedRegionCollector() {} | |
96 | |
97 // Adds |region_to_test| to the |regions_with_parents_| collection, if the | |
98 // given region has a parent in |parent_regions_|. The |region_to_test| | |
99 // parameter should not be NULL. | |
100 void operator()(const RegionData* region_to_test) { | |
101 DCHECK(region_to_test); | |
102 if (parent_regions_.find(®ion_to_test->parent()) != | |
103 parent_regions_.end()) { | |
104 regions_with_parents_->insert(region_to_test); | |
105 } | |
106 } | |
107 | |
108 private: | |
109 const RegionContainer& parent_regions_; | |
110 RegionContainer* regions_with_parents_; | |
111 }; | |
112 | |
113 // A region and its metadata useful for constructing a suggestion. | |
114 struct SuggestionRegion { | |
115 SuggestionRegion(const RegionData* region, | |
116 Bitset region_fields, | |
117 AddressField address_field) | |
118 : region(region), | |
119 region_fields(region_fields), | |
120 address_field(address_field) {} | |
121 | |
122 ~SuggestionRegion() {} | |
123 | |
124 // Not owned region data. | |
125 const RegionData* region; | |
126 | |
127 // A bitset of region fields of the |region|. | |
128 Bitset region_fields; | |
129 | |
130 // The address field of the |region|. | |
131 AddressField address_field; | |
132 }; | |
133 | |
134 // Removes duplicates based on RegionData and sums up the region field bitsets | |
135 // of the duplicates. | |
136 void RemoveDuplicateRegions(std::vector<SuggestionRegion>* regions) { | |
137 DCHECK(regions); | |
138 | |
139 std::map<const RegionData*, SuggestionRegion*> uniques; | |
140 for (std::vector<SuggestionRegion>::iterator region_it = regions->begin(); | |
141 region_it != regions->end(); ++region_it) { | |
142 SuggestionRegion* lookup = &(*region_it); | |
143 std::map<const RegionData*, SuggestionRegion*>::iterator unique_it = | |
144 uniques.find(lookup->region); | |
145 if (unique_it == uniques.end()) | |
146 unique_it = uniques.insert(std::make_pair(lookup->region, lookup)).first; | |
147 SuggestionRegion* target = unique_it->second; | |
148 target->region_fields = | |
149 AddBitsets(target->region_fields, lookup->region_fields); | |
150 } | |
151 | |
152 std::vector<SuggestionRegion> result; | |
153 for (std::map<const RegionData*, SuggestionRegion*>::const_iterator | |
154 unique_it = uniques.begin(); | |
155 unique_it != uniques.end(); ++unique_it) { | |
156 result.push_back(*unique_it->second); | |
157 } | |
158 regions->swap(result); | |
159 } | |
160 | |
161 } // namespace | |
162 | |
163 // Builds and caches tries from region data. | |
164 class InputSuggester::TrieCollection { | |
165 public: | |
166 TrieCollection(PreloadSupplier* supplier) | |
167 : region_data_builder_(supplier) { | |
168 UErrorCode error_code = U_ZERO_ERROR; | |
169 collator_.reset( | |
170 icu::Collator::createInstance(icu::Locale::getRoot(), error_code)); | |
171 DCHECK(U_SUCCESS(error_code)); | |
172 collator_->setStrength(icu::Collator::PRIMARY); | |
173 } | |
174 | |
175 ~TrieCollection() { | |
176 // Delete the maps and Trie objects owned by |tries_| field. | |
177 for (RegionMap::const_iterator region_it = tries_.begin(); | |
178 region_it != tries_.end(); ++region_it) { | |
179 FieldMap* field_map = region_it->second; | |
180 DCHECK(field_map); | |
181 | |
182 for (FieldMap::const_iterator field_it = field_map->begin(); | |
183 field_it != field_map->end(); ++field_it) { | |
184 IdMap* id_map = field_it->second; | |
185 DCHECK(id_map); | |
186 | |
187 for (IdMap::const_iterator id_it = id_map->begin(); | |
188 id_it != id_map->end(); ++id_it) { | |
189 RegionDataTrie* trie = id_it->second; | |
190 delete trie; | |
191 } | |
192 | |
193 delete id_map; | |
194 } | |
195 | |
196 delete field_map; | |
197 } | |
198 } | |
199 | |
200 // Builds a list of regions that can be suggested as completions for | |
201 // |user_input| when the user is typing into |focused_field|. | |
202 std::vector<SuggestionRegion> BuildSuggestionRegions( | |
203 const AddressData& user_input, | |
204 AddressField focused_field) { | |
205 std::vector<SuggestionRegion> result; | |
206 | |
207 // Lazily initialize the mapping from fields to Trie objects. | |
208 std::string unused_best_language; | |
209 const RegionData& region_data = region_data_builder_.Build( | |
210 user_input.region_code, user_input.language_code, | |
211 &unused_best_language); | |
212 RegionMap::iterator tries_it = tries_.find(®ion_data); | |
213 if (tries_it == tries_.end()) { | |
214 tries_it = tries_.insert( | |
215 std::make_pair(®ion_data, new FieldMap)).first; | |
216 if (!region_data.sub_regions().empty()) | |
217 AddTriesForSubRegionsOf(region_data, COUNTRY, tries_it->second); | |
218 } | |
219 const FieldMap& field_map = *(tries_it->second); | |
220 | |
221 // Do not suggest anything if there's no suggestion data for any fields. | |
222 if (field_map.empty()) | |
223 return result; | |
224 | |
225 // Do not suggest anything if there's no suggestion data for the focused | |
226 // field. | |
227 if (focused_field != POSTAL_CODE && | |
228 field_map.find(focused_field) == field_map.end()) { | |
229 return result; | |
230 } | |
231 | |
232 // Calculate the most specific field with suggestion data. | |
233 AddressField deepest_field_with_data = DEPENDENT_LOCALITY; | |
234 while (field_map.find(deepest_field_with_data) == field_map.end()) { | |
235 deepest_field_with_data = | |
236 static_cast<AddressField>(deepest_field_with_data - 1); | |
237 } | |
238 | |
239 // Determine the most specific address field that can be suggested. | |
240 AddressField most_specific_field = focused_field != POSTAL_CODE | |
241 ? focused_field : DEPENDENT_LOCALITY; | |
242 if (most_specific_field > deepest_field_with_data) { | |
243 most_specific_field = deepest_field_with_data; | |
244 } | |
245 if (focused_field != POSTAL_CODE) { | |
246 while (user_input.GetFieldValue(most_specific_field).empty() && | |
247 most_specific_field > ADMIN_AREA) { | |
248 most_specific_field = | |
249 static_cast<AddressField>(most_specific_field - 1); | |
250 } | |
251 } | |
252 | |
253 // Canonicalized user input data for lookup in the tries. | |
254 AddressData canonical_input = user_input; | |
255 for (int i = ADMIN_AREA; i <= most_specific_field; ++i) { | |
256 AddressField address_field = static_cast<AddressField>(i); | |
257 canonical_input.SetFieldValue( | |
258 address_field, | |
259 CanonicalizeString(user_input.GetFieldValue(address_field))); | |
260 } | |
261 | |
262 AddressField suggestion_field = ADMIN_AREA; | |
263 | |
264 typedef std::map<RegionField, RegionContainer*> IdRegionContainer; | |
265 typedef std::map<AddressField, IdRegionContainer*> FieldRegionContainer; | |
266 FieldRegionContainer matching_regions; | |
267 | |
268 IdRegionContainer* parent_id_regions = NULL; | |
269 for (int i = ADMIN_AREA; i <= most_specific_field; ++i) { | |
270 AddressField address_field = static_cast<AddressField>(i); | |
271 | |
272 IdRegionContainer* id_regions = new IdRegionContainer; | |
273 matching_regions.insert(std::make_pair(address_field, id_regions)); | |
274 | |
275 FieldMap::const_iterator field_map_it = field_map.find(address_field); | |
276 DCHECK(field_map_it != field_map.end()); | |
277 const IdMap* id_map = field_map_it->second; | |
278 DCHECK(id_map); | |
279 | |
280 for (IdMap::const_iterator id_map_it = id_map->begin(); | |
281 id_map_it != id_map->end(); ++id_map_it) { | |
282 RegionField id = id_map_it->first; | |
283 | |
284 RegionContainer* regions = new RegionContainer; | |
285 id_regions->insert(std::make_pair(id, regions)); | |
286 | |
287 const RegionDataTrie* trie = id_map_it->second; | |
288 DCHECK(trie); | |
289 | |
290 trie->FindDataForKeyPrefix( | |
291 canonical_input.GetFieldValue(address_field), regions); | |
292 | |
293 // Filter out the regions whose parents do not match the user input. | |
294 if (address_field > ADMIN_AREA) { | |
295 DCHECK(parent_id_regions); | |
296 | |
297 IdRegionContainer::const_iterator parent_id_regions_it = | |
298 parent_id_regions->find(id); | |
299 DCHECK(parent_id_regions_it != parent_id_regions->end()); | |
300 | |
301 RegionContainer* parent_regions = parent_id_regions_it->second; | |
302 DCHECK(parent_regions); | |
303 | |
304 RegionContainer regions_with_parents; | |
305 std::for_each(regions->begin(), regions->end(), | |
306 ParentedRegionCollector( | |
307 *parent_regions, ®ions_with_parents)); | |
308 regions->swap(regions_with_parents); | |
309 } | |
310 | |
311 for (RegionContainer::const_iterator region_it = regions->begin(); | |
312 region_it != regions->end(); ++region_it) { | |
313 if (address_field > suggestion_field) { | |
314 suggestion_field = address_field; | |
315 result.clear(); | |
316 } | |
317 result.push_back( | |
318 SuggestionRegion(*region_it, BitsetFromBit(id), address_field)); | |
319 } | |
320 } | |
321 | |
322 parent_id_regions = id_regions; | |
323 } | |
324 | |
325 // Cleanup. | |
Evan Stade
2014/06/19 00:38:00
please do not try to handle bare pointers, use stl
please use gerrit instead
2014/06/20 21:09:15
Done.
| |
326 for (FieldRegionContainer::const_iterator | |
327 field_it = matching_regions.begin(); | |
328 field_it != matching_regions.end(); ++field_it) { | |
329 IdRegionContainer* id_regions = field_it->second; | |
330 DCHECK(id_regions); | |
331 for (IdRegionContainer::const_iterator id_it = id_regions->begin(); | |
332 id_it != id_regions->end(); ++id_it) { | |
333 RegionContainer* regions = id_it->second; | |
334 delete regions; | |
335 } | |
336 delete id_regions; | |
337 } | |
338 | |
339 RemoveDuplicateRegions(&result); | |
340 return result; | |
341 } | |
342 | |
343 private: | |
344 typedef Trie<const RegionData*> RegionDataTrie; | |
345 typedef std::map<RegionField, RegionDataTrie*> IdMap; | |
Evan Stade
2014/06/19 00:38:00
serious? you need a map for this? RegionField only
please use gerrit instead
2014/06/20 21:09:15
Switched to structs and classes.
| |
346 typedef std::map<AddressField, IdMap*> FieldMap; | |
347 typedef std::map<const RegionData*, FieldMap*> RegionMap; | |
348 | |
349 // Recursively builds tries from subregions of |parent| and add these tries to | |
350 // the |field_map|. | |
351 void AddTriesForSubRegionsOf(const RegionData& parent, | |
352 AddressField parent_field, | |
353 FieldMap* field_map) { | |
354 DCHECK(!parent.sub_regions().empty()); | |
355 DCHECK(field_map); | |
356 | |
357 AddressField field = static_cast<AddressField>(parent_field + 1); | |
358 DCHECK_GE(field, ADMIN_AREA); | |
359 DCHECK_LE(field, DEPENDENT_LOCALITY); | |
360 | |
361 FieldMap::iterator field_it = field_map->find(field); | |
362 if (field_it == field_map->end()) | |
363 field_it = field_map->insert(std::make_pair(field, new IdMap)).first; | |
364 | |
365 IdMap* id_map = field_it->second; | |
366 DCHECK(id_map); | |
367 | |
368 IdMap::iterator key_it = id_map->find(KEY); | |
369 if (key_it == id_map->end()) | |
370 key_it = id_map->insert(std::make_pair(KEY, new RegionDataTrie)).first; | |
371 RegionDataTrie* key_trie = key_it->second; | |
372 DCHECK(key_trie); | |
373 | |
374 IdMap::iterator name_it = id_map->find(NAME); | |
375 if (name_it == id_map->end()) | |
376 name_it = id_map->insert(std::make_pair(NAME, new RegionDataTrie)).first; | |
377 RegionDataTrie* name_trie = name_it->second; | |
378 DCHECK(name_trie); | |
379 | |
380 for (std::vector<const RegionData*>::const_iterator region_it = | |
381 parent.sub_regions().begin(); | |
382 region_it != parent.sub_regions().end(); ++region_it) { | |
383 const RegionData* region = *region_it; | |
384 DCHECK(region); | |
385 | |
386 key_trie->AddDataForKey(CanonicalizeString(region->key()), region); | |
387 | |
388 if (!region->name().empty()) { | |
389 name_trie->AddDataForKey(CanonicalizeString(region->name()), region); | |
390 } | |
391 | |
392 if (!region->sub_regions().empty()) | |
393 AddTriesForSubRegionsOf(*region, field, field_map); | |
394 } | |
395 } | |
396 | |
397 // Returns a canonical version of the string that can be used for comparing | |
398 // strings regardless of diacritics and capitalization. | |
399 // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas"); | |
400 // CanonicalizeString("Texas") == CanonicalizeString("teXas"); | |
401 // CanonicalizeString("Texas") != CanonicalizeString("California"); | |
402 // | |
403 // The output is not human-readable. | |
404 // CanonicalizeString("Texas") != "Texas"; | |
405 std::string CanonicalizeString(const std::string& original) const { | |
406 icu::UnicodeString icu_str( | |
407 original.c_str(), static_cast<int32_t>(original.length())); | |
408 int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0); | |
409 scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]); | |
410 DCHECK(buffer.get()); | |
411 int32_t filled_size = | |
412 collator_->getSortKey(icu_str, buffer.get(), buffer_size); | |
413 DCHECK_EQ(buffer_size, filled_size); | |
414 return std::string(reinterpret_cast<const char*>(buffer.get())); | |
415 } | |
416 | |
417 // A mapping of the country-level RegionData objects to a collection of Trie | |
418 // objects. Tries and maps are owned. RegionDatas are not owned. | |
419 RegionMap tries_; | |
420 | |
421 // Data source for region data. | |
422 RegionDataBuilder region_data_builder_; | |
423 | |
424 // Canonicalizes strings for case and diacritic insensitive search. | |
425 scoped_ptr<icu::Collator> collator_; | |
426 | |
427 DISALLOW_COPY_AND_ASSIGN(TrieCollection); | |
428 }; | |
429 | |
430 InputSuggester::InputSuggester(PreloadSupplier* supplier) | |
431 : trie_collection_(new TrieCollection(supplier)) {} | |
432 | |
433 InputSuggester::~InputSuggester() {} | |
434 | |
435 void InputSuggester::GetSuggestions(const AddressData& user_input, | |
436 AddressField focused_field, | |
437 size_t suggestions_limit, | |
438 std::vector<AddressData>* suggestions) { | |
439 DCHECK(suggestions); | |
440 DCHECK(focused_field == POSTAL_CODE || | |
441 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY)); | |
442 | |
443 // Do not suggest anything if the user input is empty. | |
444 if (user_input.IsFieldEmpty(focused_field)) | |
445 return; | |
446 | |
447 // Build the list of regions that match |user_input| when the user is typing | |
448 // in the |focused_field|. | |
449 const std::vector<SuggestionRegion>& regions = | |
450 trie_collection_->BuildSuggestionRegions(user_input, focused_field); | |
451 | |
452 // Generate suggestions based on the regions. | |
453 for (std::vector<SuggestionRegion>::const_iterator | |
454 region_it = regions.begin(); | |
455 region_it != regions.end(); ++region_it) { | |
456 // Do not add more suggestions than |suggestions_limit|. | |
457 if (suggestions->size() >= suggestions_limit) { | |
458 suggestions->clear(); | |
459 return; | |
460 } | |
461 | |
462 AddressData suggestion; | |
463 suggestion.region_code = user_input.region_code; | |
464 suggestion.postal_code = user_input.postal_code; | |
465 | |
466 // Traverse the tree of regions from the most specific |region| to the | |
467 // country-wide "root" of the tree. Use the region names found at each of | |
468 // the levels of the ruleset tree to build the |suggestion|. | |
469 DCHECK(region_it->region); | |
470 AddressField address_field = region_it->address_field; | |
471 for (const RegionData* suggestion_region = region_it->region; | |
472 suggestion_region->has_parent(); | |
473 suggestion_region = &suggestion_region->parent()) { | |
474 suggestion.SetFieldValue( | |
475 address_field, | |
476 GetValueFromRegion(suggestion_region, region_it->region_fields)); | |
477 address_field = static_cast<AddressField>(address_field - 1); | |
478 } | |
479 | |
480 suggestions->push_back(suggestion); | |
481 } | |
482 } | |
483 | |
484 } // namespace autofill | |
OLD | NEW |