Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(614)

Side by Side Diff: third_party/libaddressinput/chromium/suggestions.cc

Issue 298863012: Use upstream libaddressinput in Chrome. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Self review. Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/libaddressinput/chromium/suggestions.h"
6
7 #include <bitset>
8 #include <map>
9 #include <queue>
10 #include <set>
11 #include <utility>
12
13 #include "base/basictypes.h"
14 #include "base/logging.h"
15 #include "base/memory/scoped_ptr.h"
16 #include "third_party/icu/source/common/unicode/errorcode.h"
17 #include "third_party/icu/source/common/unicode/locid.h"
18 #include "third_party/icu/source/common/unicode/unistr.h"
19 #include "third_party/icu/source/common/unicode/utypes.h"
20 #include "third_party/icu/source/i18n/unicode/coll.h"
21 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da ta.h"
22 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_su pplier.h"
23 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a.h"
24
25 namespace autofill {
26
27 using ::i18n::addressinput::AddressData;
28 using ::i18n::addressinput::AddressField;
29 using ::i18n::addressinput::ADMIN_AREA;
30 using ::i18n::addressinput::COUNTRY;
31 using ::i18n::addressinput::DEPENDENT_LOCALITY;
32 using ::i18n::addressinput::POSTAL_CODE;
33 using ::i18n::addressinput::PreloadSupplier;
34 using ::i18n::addressinput::RegionData;
35 using ::i18n::addressinput::RegionDataBuilder;
36
37 namespace {
38
39 // A type to store a set of pointers to RegionData objects.
40 typedef std::set<const RegionData*> RegionContainer;
41
42 // Collects regions based on whether they have a parent in the given list.
43 class ParentedRegionCollector {
44 public:
45 // Retains a reference to both of the parameters. Does not make a copy of
46 // |parent_regions|. Does not take ownership of |regions_with_parents|. The
47 // |regions_with_parents| parameter should not be NULL.
48 ParentedRegionCollector(const RegionContainer& parent_regions,
49 RegionContainer* regions_with_parents)
50 : parent_regions_(parent_regions),
51 regions_with_parents_(regions_with_parents) {
52 DCHECK(regions_with_parents_);
53 }
54
55 ~ParentedRegionCollector() {}
56
57 // Adds |region_to_test| to the |regions_with_parents_| collection, if the
58 // given region has a parent in |parent_regions_|. The |region_to_test|
59 // parameter should not be NULL.
60 void operator()(const RegionData* region_to_test) {
61 DCHECK(region_to_test);
62 if (parent_regions_.find(&region_to_test->parent()) !=
63 parent_regions_.end()) {
64 regions_with_parents_->insert(region_to_test);
65 }
66 }
67
68 private:
69 const RegionContainer parent_regions_;
70 RegionContainer* regions_with_parents_;
71 };
72
73 } // namespace
74
75 class Suggestions::CanonicalizerImpl {
76 public:
77 CanonicalizerImpl() {
78 UErrorCode error_code = U_ZERO_ERROR;
79 collator_.reset(
80 icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
81 DCHECK(U_SUCCESS(error_code));
82 collator_->setStrength(icu::Collator::PRIMARY);
83 }
84
85 ~CanonicalizerImpl() {}
86
87 // Returns a canonical version of the string that can be used for comparing
88 // strings regardless of diacritics and capitalization.
89 // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas");
90 // CanonicalizeString("Texas") == CanonicalizeString("teXas");
91 // CanonicalizeString("Texas") != CanonicalizeString("California");
92 //
93 // The output is not human-readable.
94 // CanonicalizeString("Texas") != "Texas";
95 std::string CanonicalizeString(const std::string& original) {
96 icu::UnicodeString icu_str(
97 original.c_str(), static_cast<int32_t>(original.length()));
98 int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0);
99 scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
100 DCHECK(buffer.get());
101 int32_t filled_size =
102 collator_->getSortKey(icu_str, buffer.get(), buffer_size);
103 DCHECK_EQ(buffer_size, filled_size);
104 return std::string(reinterpret_cast<const char*>(buffer.get()));
105 }
106
107 private:
108 scoped_ptr<icu::Collator> collator_;
109
110 DISALLOW_COPY_AND_ASSIGN(CanonicalizerImpl);
111 };
112
113 Suggestions::Suggestions(PreloadSupplier* supplier)
114 : region_data_builder_(supplier),
115 canonicalizer_(new CanonicalizerImpl) {}
116
117 Suggestions::~Suggestions() {
118 // Delete the maps and Trie objects owned by |tries_| field.
119 for (RegionMap::const_iterator region_it = tries_.begin();
please use gerrit instead 2014/06/05 22:22:49 Investigate: can we put RegionMap into its own str
120 region_it != tries_.end(); ++region_it) {
121 FieldMap* field_map = region_it->second;
122 DCHECK(field_map);
123
124 for (FieldMap::const_iterator field_it = field_map->begin();
125 field_it != field_map->end(); ++field_it) {
126 IdMap* id_map = field_it->second;
127 DCHECK(id_map);
128
129 for (IdMap::const_iterator id_it = id_map->begin();
130 id_it != id_map->end(); ++id_it) {
131 RegionDataTrie* trie = id_it->second;
132 delete trie;
133 }
134
135 delete id_map;
136 }
137
138 delete field_map;
139 }
140 }
141
142 void Suggestions::GetSuggestions(const AddressData& user_input,
143 AddressField focused_field,
144 size_t suggestions_limit,
145 std::vector<AddressData>* suggestions) {
146 DCHECK(suggestions);
147 suggestions->clear();
148
149 // Do not suggest anything if the user is typing in the field for which
150 // there's no suggestion data.
151 if (focused_field != POSTAL_CODE &&
please use gerrit instead 2014/06/05 22:22:49 Remove special handling for POSTAL CODE!
please use gerrit instead 2014/06/09 23:28:17 Simply removing handling of postal code will break
152 (focused_field < ADMIN_AREA || focused_field > DEPENDENT_LOCALITY)) {
153 return;
154 }
155
156 // Do not suggest anything if the user input is empty.
157 if (user_input.IsFieldEmpty(focused_field))
158 return;
159
160 // Lazily initialize the mapping from fields to Trie objects.
161 const FieldMap& field_map = GetTries(user_input);
162
163 // Do not suggest anything if there's no suggestion data for any fields.
164 if (field_map.empty())
165 return;
166
167 // Do not suggest anything if there's no suggestion data for the focused
168 // field.
169 if (focused_field != POSTAL_CODE &&
170 field_map.find(focused_field) == field_map.end()) {
171 return;
172 }
173
174 // Calculate the most specific field with suggestion data.
175 AddressField deepest_field_with_data = DEPENDENT_LOCALITY;
176 while (field_map.find(deepest_field_with_data) == field_map.end()) {
177 deepest_field_with_data
178 = static_cast<AddressField>(deepest_field_with_data - 1);
179 }
180
181 // Determine the most specific address field that can be suggested.
182 AddressField most_specific_field = focused_field != POSTAL_CODE
183 ? focused_field : DEPENDENT_LOCALITY;
184 if (most_specific_field > deepest_field_with_data) {
185 most_specific_field = deepest_field_with_data;
186 }
187 if (focused_field != POSTAL_CODE) {
188 while (user_input.GetFieldValue(most_specific_field).empty() &&
189 most_specific_field > ADMIN_AREA) {
190 most_specific_field = static_cast<AddressField>(most_specific_field - 1);
191 }
192 }
193
194 // The regions that match the most precise fields in the address. (Locality is
195 // more precise than administrative area, for example.)
196 typedef std::bitset<REGION_IDENTITY_FIELDS_SIZE> MatchingIds;
197 typedef std::map<const RegionData*, MatchingIds> RegionIds;
198 scoped_ptr<RegionIds> precise_regions;
please use gerrit instead 2014/06/05 22:22:49 Use more data structures that hide the complexity
199
200 { // Begin lifetime of |suggestion_regions|.
201 // The regions that match all fields in the address.
202 typedef std::map<AddressField, RegionIds*> FieldRegionIds;
203 FieldRegionIds suggestion_regions;
204 AddressField suggestion_field = ADMIN_AREA;
205
206 { // Begin lifetime of |matching_regions|.
207 // All regions that match at least one field in the address.
208 typedef std::map<RegionIdentityField, RegionContainer*> IdRegionContainer;
209 typedef std::map<AddressField, IdRegionContainer*> FieldRegionContainer;
210 FieldRegionContainer matching_regions;
211
212 // Find all regions that match user input.
213 IdRegionContainer* parent_id_regions = NULL;
214 for (int i = ADMIN_AREA; i <= most_specific_field; ++i) {
215 AddressField address_field = static_cast<AddressField>(i);
216
217 const std::string& canonical_user_input =
218 canonicalizer_->CanonicalizeString(
219 user_input.GetFieldValue(address_field));
220
221 IdRegionContainer* id_regions = new IdRegionContainer;
222 matching_regions.insert(std::make_pair(address_field, id_regions));
223
224 FieldMap::const_iterator field_map_it = field_map.find(address_field);
225 DCHECK(field_map_it != field_map.end());
226 const IdMap* id_map = field_map_it->second;
227 DCHECK(id_map);
228
229 for (IdMap::const_iterator id_map_it = id_map->begin();
230 id_map_it != id_map->end(); ++id_map_it) {
231 RegionIdentityField id = id_map_it->first;
232 DCHECK_LT(id, REGION_IDENTITY_FIELDS_SIZE);
233
234 RegionContainer* regions = new RegionContainer;
235 id_regions->insert(std::make_pair(id, regions));
236
237 const RegionDataTrie* trie = id_map_it->second;
238 DCHECK(trie);
239
240 trie->FindDataForKeyPrefix(canonical_user_input, regions);
241
242 // Filter out the regions whose parents do not match the user input.
243 if (address_field > ADMIN_AREA) {
244 DCHECK(parent_id_regions);
245
246 IdRegionContainer::const_iterator parent_id_regions_it =
247 parent_id_regions->find(id);
248 DCHECK(parent_id_regions_it != parent_id_regions->end());
249
250 RegionContainer* parent_regions = parent_id_regions_it->second;
251 DCHECK(parent_regions);
252
253 RegionContainer regions_with_parents;
254 std::for_each(regions->begin(), regions->end(),
255 ParentedRegionCollector(
256 *parent_regions, &regions_with_parents));
257 regions->swap(regions_with_parents);
258 }
259 }
260
261 parent_id_regions = id_regions;
262 }
263
264 // Collect the RegionIdentityField into a single bitmask per RegionData.
265 for (FieldRegionContainer::const_iterator field_it =
266 matching_regions.begin();
267 field_it != matching_regions.end(); ++field_it) {
268 AddressField address_field = field_it->first;
269 const IdRegionContainer* id_regions = field_it->second;
270 DCHECK(id_regions);
271
272 for (IdRegionContainer::const_iterator id_it = id_regions->begin();
273 id_it != id_regions->end(); ++id_it) {
274 RegionIdentityField id = id_it->first;
275 const RegionContainer* regions = id_it->second;
276 DCHECK(regions);
277
278 for (RegionContainer::const_iterator region_it = regions->begin();
279 region_it != regions->end(); ++region_it) {
280 const RegionData* region = *region_it;
281 DCHECK(region);
282
283 FieldRegionIds::iterator field_region_id_it =
284 suggestion_regions.find(address_field);
285 if (field_region_id_it == suggestion_regions.end()) {
286 field_region_id_it = suggestion_regions.insert(
287 std::make_pair(address_field, new RegionIds)).first;
288 }
289 DCHECK(field_region_id_it->second);
290
291 RegionIds& region_ids = *(field_region_id_it->second);
292 region_ids[region].set(id);
293
294 if (suggestion_field < address_field)
295 suggestion_field = address_field;
296 }
297 }
298 }
299
300 // Clean up |matching_regions|.
301 for (FieldRegionContainer::const_iterator field_it =
302 matching_regions.begin();
303 field_it != matching_regions.end(); ++field_it) {
304 IdRegionContainer* id_regions = field_it->second;
305 DCHECK(id_regions);
306 for (IdRegionContainer::const_iterator id_it = id_regions->begin();
307 id_it != id_regions->end(); ++id_it) {
308 RegionContainer* regions = id_it->second;
309 delete regions;
310 }
311 delete id_regions;
312 }
313 } // End lifetime of |matching_regions|.
314
315 // Filter out the less precise regions.
316 FieldRegionIds::const_iterator suggestion_regions_it =
317 suggestion_regions.find(suggestion_field);
318 if (suggestion_regions_it == suggestion_regions.end())
319 return;
320
321 precise_regions.reset(suggestion_regions_it->second);
322 DCHECK(precise_regions);
323
324 for (FieldRegionIds::iterator field_region_id_it =
325 suggestion_regions.begin();
326 field_region_id_it != suggestion_regions.end();
327 ++field_region_id_it) {
328 if (precise_regions.get() != field_region_id_it->second)
329 delete field_region_id_it->second;
330 }
331 } // End lifetime of |suggestion_regions|.
332
333 // Generate suggestions based on the regions. Use a RegionIdentityField from
334 // the bitset to generate address field values.
335 DCHECK(precise_regions);
336 for (RegionIds::const_iterator suggestion_it = precise_regions->begin();
337 suggestion_it != precise_regions->end(); ++suggestion_it) {
338 // Do not add more suggestions than |suggestions_limit|.
339 if (suggestions->size() >= suggestions_limit) {
340 suggestions->clear();
341 return;
342 }
343
344 const MatchingIds& matching_ids = suggestion_it->second;
345 RegionIdentityField id = KEY;
346 if (matching_ids.test(KEY)) {
347 id = KEY;
348 } else if (matching_ids.test(NAME)) {
349 id = NAME;
350 } else {
351 NOTREACHED();
352 }
353
354 AddressData suggestion;
355 suggestion.region_code = user_input.region_code;
356 suggestion.postal_code = user_input.postal_code;
357
358 // Traverse the tree of regions from the most specific |region| to the
359 // country-wide "root" of the tree. Use the region names found at each of
360 // the levels of the ruleset tree to build the |suggestion|.
361 std::vector<const RegionData*> suggestion_fields;
362 DCHECK(suggestion_it->first);
363 for (const RegionData* suggestion_region = suggestion_it->first;
364 suggestion_region->has_parent();
365 suggestion_region = &suggestion_region->parent()) {
366 suggestion_fields.insert(suggestion_fields.begin(), suggestion_region);
367 }
368 AddressField address_field = ADMIN_AREA;
369 for (std::vector<const RegionData*>::const_iterator field_it =
370 suggestion_fields.begin();
371 field_it != suggestion_fields.end(); ++field_it) {
372 suggestion.SetFieldValue(
373 address_field,
374 id == KEY ? (*field_it)->key() : (*field_it)->name());
375 address_field = static_cast<AddressField>(address_field + 1);
376 }
377
378 suggestions->push_back(suggestion);
379 }
380 }
381
382 void Suggestions::AddTriesForSubRegionsOf(const RegionData& parent,
383 AddressField parent_field,
384 FieldMap* field_map) {
385 DCHECK(!parent.sub_regions().empty());
386 DCHECK(field_map);
387
388 AddressField field = static_cast<AddressField>(parent_field + 1);
389 DCHECK_GE(field, ADMIN_AREA);
390 DCHECK_LE(field, DEPENDENT_LOCALITY);
391
392 FieldMap::iterator field_it = field_map->find(field);
393 if (field_it == field_map->end())
394 field_it = field_map->insert(std::make_pair(field, new IdMap)).first;
395
396 IdMap* id_map = field_it->second;
397 DCHECK(id_map);
398
399 IdMap::iterator key_it = id_map->find(KEY);
400 if (key_it == id_map->end())
401 key_it = id_map->insert(std::make_pair(KEY, new RegionDataTrie)).first;
402 RegionDataTrie* key_trie = key_it->second;
403 DCHECK(key_trie);
404
405 IdMap::iterator name_it = id_map->find(NAME);
406 if (name_it == id_map->end())
407 name_it = id_map->insert(std::make_pair(NAME, new RegionDataTrie)).first;
408 RegionDataTrie* name_trie = name_it->second;
409 DCHECK(name_trie);
410
411 for (std::vector<const RegionData*>::const_iterator region_it =
412 parent.sub_regions().begin();
413 region_it != parent.sub_regions().end(); ++region_it) {
414 const RegionData* region = *region_it;
415 DCHECK(region);
416
417 key_trie->AddDataForKey(
418 canonicalizer_->CanonicalizeString(region->key()), region);
419
420 if (!region->name().empty()) {
421 name_trie->AddDataForKey(
422 canonicalizer_->CanonicalizeString(region->name()), region);
423 }
424
425 if (!region->sub_regions().empty())
426 AddTriesForSubRegionsOf(*region, field, field_map);
427 }
428 }
429
430 const Suggestions::FieldMap& Suggestions::GetTries(
431 const AddressData& user_input) {
432 std::string unused_best_region_tree_language_tag;
433 const RegionData& region_data = region_data_builder_.Build(
434 user_input.region_code, user_input.language_code,
435 &unused_best_region_tree_language_tag);
436
437 RegionMap::iterator tries_it = tries_.find(&region_data);
438 if (tries_it == tries_.end()) {
439 tries_it = tries_.insert(std::make_pair(&region_data, new FieldMap)).first;
440 if (!region_data.sub_regions().empty())
441 AddTriesForSubRegionsOf(region_data, COUNTRY, tries_it->second);
442 }
443
444 return *(tries_it->second);
445 }
446
447 } // namespace autofill
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698