OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "third_party/libaddressinput/chromium/input_suggester.h" | |
6 | |
7 #include <map> | |
8 #include <set> | |
9 #include <utility> | |
10 | |
11 #include "base/basictypes.h" | |
12 #include "base/logging.h" | |
13 #include "base/memory/scoped_ptr.h" | |
14 #include "base/stl_util.h" | |
15 #include "third_party/icu/source/i18n/unicode/coll.h" | |
16 #include "third_party/libaddressinput/chromium/trie.h" | |
17 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_da ta.h" | |
18 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/callback.h " | |
19 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_su pplier.h" | |
20 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a.h" | |
21 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_dat a_builder.h" | |
22 | |
23 namespace autofill { | |
24 | |
25 using ::i18n::addressinput::AddressData; | |
26 using ::i18n::addressinput::AddressField; | |
27 using ::i18n::addressinput::BuildCallback; | |
28 using ::i18n::addressinput::FieldProblemMap; | |
29 using ::i18n::addressinput::PreloadSupplier; | |
30 using ::i18n::addressinput::RegionData; | |
31 using ::i18n::addressinput::RegionDataBuilder; | |
32 | |
33 using ::i18n::addressinput::ADMIN_AREA; | |
34 using ::i18n::addressinput::COUNTRY; | |
35 using ::i18n::addressinput::DEPENDENT_LOCALITY; | |
36 using ::i18n::addressinput::LOCALITY; | |
37 using ::i18n::addressinput::POSTAL_CODE; | |
38 | |
39 using ::i18n::addressinput::INVALID_FORMAT; | |
40 using ::i18n::addressinput::MISMATCHING_VALUE; | |
41 | |
42 namespace { | |
43 | |
44 // A region and its metadata useful for constructing a suggestion. | |
45 struct Suggestion { | |
46 public: | |
47 // Builds a suggestion of |region_to_suggest|. Does not take ownership of | |
48 // |region_to_suggest|, which should not be NULL. | |
49 Suggestion(const RegionData* region_to_suggest, | |
50 AddressField matching_address_field, | |
51 bool region_key_matches) | |
52 : region_to_suggest(region_to_suggest), | |
53 matching_address_field(matching_address_field), | |
54 region_key_matches(region_key_matches) { | |
55 DCHECK(region_to_suggest); | |
56 } | |
57 | |
58 ~Suggestion() {} | |
59 | |
60 // The region that should be suggested. For example, if the region is ("CA", | |
61 // "California"), then either "CA" or "California" should be suggested. | |
62 const RegionData* region_to_suggest; | |
63 | |
64 // The field in the address for which the suggestion should be made. For | |
65 // example, ADMIN_AREA in US means the suggestion should be made for the field | |
66 // labeled "State". | |
67 AddressField matching_address_field; | |
68 | |
69 // True if the key of the region matches user input (the name may or may not | |
70 // match). "CA" should be suggested for a ("CA", "California") region. | |
71 // | |
72 // False if only the name of the region matches user input (key does not | |
73 // match). "California" should be suggested fir a ("CA", "California") region. | |
74 bool region_key_matches; | |
75 }; | |
76 | |
77 // Suggestions for an address. Contains lists of suggestions for administrative | |
78 // area, locality, and dependent locality fields of an address. | |
79 class AddressSuggestions { | |
80 public: | |
81 AddressSuggestions() { | |
82 for (int i = ADMIN_AREA; i <= LOCALITY; ++i) { | |
83 regions_match_input_[static_cast<AddressField>(i)] = | |
84 new RegionsMatchInput; | |
85 } | |
86 } | |
87 | |
88 ~AddressSuggestions() { STLDeleteValues(®ions_match_input_); } | |
89 | |
90 // Marks all regions at |address_field| level as matching user input. | |
91 void AllRegionsMatchForField(AddressField address_field) { | |
92 all_regions_match_input_.insert(address_field); | |
93 } | |
94 | |
95 // Marks given regions at |address_field| level as matching user input. The | |
96 // |regions_match_key| parameter contains the regions that match user input by | |
97 // their keys. The |regions_match_name| parameter contains the regions that | |
98 // match user input by their names. | |
99 // | |
100 // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or | |
101 // DEPENDENT_LOCALITY. | |
102 bool AddRegions(AddressField address_field, | |
103 const std::set<const RegionData*>& regions_match_key, | |
104 const std::set<const RegionData*>& regions_match_name) { | |
105 DCHECK(address_field >= ADMIN_AREA); | |
106 DCHECK(address_field <= DEPENDENT_LOCALITY); | |
107 | |
108 AddressField parent_address_field = | |
109 static_cast<AddressField>(address_field - 1); | |
110 | |
111 bool all_parents_match = | |
112 parent_address_field == COUNTRY || | |
113 all_regions_match_input_.find(parent_address_field) != | |
114 all_regions_match_input_.end(); | |
115 | |
116 // Cannot build |address_field| level suggestions if there are no matches in | |
117 // |parent_address_field| level regions. | |
118 const RegionsMatchInput* parents = NULL; | |
119 if (address_field > ADMIN_AREA && !all_parents_match) { | |
120 parents = regions_match_input_[parent_address_field]; | |
121 if (parents->keys.empty() && parents->names.empty()) | |
122 return false; | |
123 } | |
124 | |
125 RegionsMatchInput* regions = NULL; | |
126 if (address_field < DEPENDENT_LOCALITY) | |
127 regions = regions_match_input_[address_field]; | |
128 | |
129 std::vector<Suggestion>& suggestions = suggestions_[address_field]; | |
130 bool added_suggestions = false; | |
131 | |
132 // Iterate over both |regions_match_key| and |regions_match_name| and build | |
133 // Suggestion objects based on the given RegionData objects. Advance either | |
134 // one iterator at a time (if they point to different data) or both | |
135 // iterators at once (if they point to the same data). | |
136 for (std::set<const RegionData*>::const_iterator | |
137 key_it = regions_match_key.begin(), | |
138 name_it = regions_match_name.begin(); | |
139 key_it != regions_match_key.end() || | |
140 name_it != regions_match_name.end();) { | |
141 const RegionData* key_region = | |
142 key_it != regions_match_key.end() ? *key_it : NULL; | |
143 const RegionData* name_region = | |
144 name_it != regions_match_name.end() ? *name_it : NULL; | |
145 | |
146 // Regions that do not have a parent that also matches input will not | |
147 // become a suggestion. | |
148 bool key_region_has_parent = | |
149 all_parents_match || | |
150 (parents && !parents->keys.empty() && key_region && | |
151 parents->keys.find(&key_region->parent()) != parents->keys.end()); | |
152 bool name_region_has_parent = | |
153 all_parents_match || | |
154 (parents && !parents->names.empty() && name_region && | |
155 parents->names.find(&name_region->parent()) != parents->names.end()); | |
156 | |
157 if (name_region && (!key_region || name_region < key_region)) { | |
158 if (name_region_has_parent) { | |
159 suggestions.push_back(Suggestion(name_region, address_field, false)); | |
160 added_suggestions = true; | |
161 if (regions) | |
162 regions->names.insert(name_region); | |
163 } | |
164 | |
165 ++name_it; | |
166 } else if (key_region && (!name_region || key_region < name_region)) { | |
167 if (key_region_has_parent) { | |
168 suggestions.push_back(Suggestion(key_region, address_field, true)); | |
169 added_suggestions = true; | |
170 if (regions) | |
171 regions->keys.insert(key_region); | |
172 } | |
173 | |
174 ++key_it; | |
175 } else { | |
176 if (key_region_has_parent) { | |
177 suggestions.push_back(Suggestion(key_region, address_field, true)); | |
178 added_suggestions = true; | |
179 if (regions) { | |
180 regions->keys.insert(key_region); | |
181 regions->names.insert(name_region); | |
182 } | |
183 } | |
184 | |
185 ++key_it; | |
186 ++name_it; | |
187 } | |
188 } | |
189 | |
190 return added_suggestions; | |
191 } | |
192 | |
193 // Swaps the suggestions for the smallest sub-region into |suggestions|. This | |
194 // object is not usable after this call due to using the swap() operation. | |
Evan Stade
2014/06/27 01:26:58
nit: s/this object/|this|
please use gerrit instead
2014/06/27 08:43:38
Done.
| |
195 // | |
196 // The |suggestions| parameter should not be NULL. | |
197 void SwapSmallestSubRegionSuggestions(std::vector<Suggestion>* suggestions) { | |
198 DCHECK(suggestions); | |
199 for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) { | |
200 std::vector<Suggestion>& result = | |
201 suggestions_[static_cast<AddressField>(i)]; | |
202 if (!result.empty()) { | |
203 result.swap(*suggestions); | |
Evan Stade
2014/06/27 01:26:58
nit: suggestions->swap(result), imo, is easier to
please use gerrit instead
2014/06/27 08:43:38
Done.
| |
204 return; | |
205 } | |
206 } | |
207 } | |
208 | |
209 private: | |
210 // The sets of non-owned regions used for looking up regions that match user | |
211 // input by keys and names. | |
212 struct RegionsMatchInput { | |
213 std::set<const RegionData*> keys; | |
214 std::set<const RegionData*> names; | |
215 }; | |
216 | |
217 // The owned sets of regions that match user input at ADMIN_AREA and LOCALITY | |
218 // levels. | |
219 std::map<AddressField, RegionsMatchInput*> regions_match_input_; | |
220 | |
221 // The set of fields for which all regions match user input. Used to avoid | |
222 // storing a long list in |regions_match_input_| and later looking it up | |
223 // there. | |
224 std::set<AddressField> all_regions_match_input_; | |
225 | |
226 // Suggestions at ADMIN_AREA, LOCALITY, and DEPENDENT_LOCALITY levels. | |
227 std::map<AddressField, std::vector<Suggestion> > suggestions_; | |
228 | |
229 DISALLOW_COPY_AND_ASSIGN(AddressSuggestions); | |
230 }; | |
231 | |
232 } // namespace | |
233 | |
234 // Canonicalizes strings for case and diacritic insensitive comparison. | |
235 class StringCanonicalizer { | |
236 public: | |
237 // Initializes the canonicalizer. This is slow, so avoid calling it more often | |
238 // than necessary. | |
239 StringCanonicalizer() : buffer_size_(0) { | |
Evan Stade
2014/06/27 01:26:58
should be initialized to something greater than 0
please use gerrit instead
2014/06/27 08:43:38
Done.
| |
240 UErrorCode error_code = U_ZERO_ERROR; | |
241 collator_.reset( | |
242 icu::Collator::createInstance(icu::Locale::getRoot(), error_code)); | |
243 DCHECK(U_SUCCESS(error_code)); | |
244 collator_->setStrength(icu::Collator::PRIMARY); | |
245 } | |
246 | |
247 ~StringCanonicalizer() {} | |
248 | |
249 // Returns a canonical version of the string that can be used for comparing | |
250 // strings regardless of diacritics and capitalization. | |
251 // Canonicalize("Texas") == Canonicalize("T\u00E9xas"); | |
252 // Canonicalize("Texas") == Canonicalize("teXas"); | |
253 // Canonicalize("Texas") != Canonicalize("California"); | |
254 // | |
255 // The output is not human-readable. | |
256 // Canonicalize("Texas") != "Texas"; | |
257 std::string Canonicalize(const std::string& original) const { | |
258 DCHECK(!original.empty()); | |
259 icu::UnicodeString icu_str(original.c_str(), original.length()); | |
260 int32_t sort_key_size = | |
261 collator_->getSortKey(icu_str, buffer_.get(), buffer_size_); | |
262 DCHECK_LT(0, sort_key_size); | |
263 if (sort_key_size > buffer_size_) { | |
264 buffer_size_ = sort_key_size * 2; | |
265 buffer_.reset(new uint8_t[buffer_size_]); | |
266 sort_key_size = | |
267 collator_->getSortKey(icu_str, buffer_.get(), buffer_size_); | |
268 DCHECK_LT(0, sort_key_size); | |
269 DCHECK_GT(buffer_size_, sort_key_size); | |
270 } | |
271 return std::string(reinterpret_cast<const char*>(buffer_.get()), | |
272 sort_key_size - 1); | |
273 } | |
274 | |
275 private: | |
276 mutable int32_t buffer_size_; | |
277 mutable scoped_ptr<uint8_t[]> buffer_; | |
278 scoped_ptr<icu::Collator> collator_; | |
279 | |
280 DISALLOW_COPY_AND_ASSIGN(StringCanonicalizer); | |
281 }; | |
282 | |
283 // All sub-regions of a COUNTRY level region, organized into tries for lookup by | |
284 // region name or key. | |
285 class InputSuggester::SubRegionData { | |
286 public: | |
287 // Adds the sub-regions of |country_region| into tries. Uses | |
288 // |shared_canonicalizer| for case and diacritic insensitive lookup of the | |
289 // sub-regions. | |
290 SubRegionData(const RegionData& country_region, | |
291 const StringCanonicalizer& shared_canonicalizer) | |
292 : canonicalizer_(shared_canonicalizer), smallest_region_size_(COUNTRY) { | |
293 DCHECK(!country_region.has_parent()); | |
294 | |
295 for (int i = ADMIN_AREA; i <= DEPENDENT_LOCALITY; ++i) | |
296 field_tries_[static_cast<AddressField>(i)] = new FieldTries; | |
297 | |
298 if (!country_region.sub_regions().empty()) | |
299 AddSubRegionsOf(country_region, COUNTRY); | |
300 } | |
301 | |
302 ~SubRegionData() { STLDeleteValues(&field_tries_); } | |
303 | |
304 // Adds the suggestions for |user_input| into |suggestions| when user is | |
305 // typing in |focused_field|. | |
306 void BuildSuggestions(const AddressData& user_input, | |
307 AddressField focused_field, | |
308 std::vector<Suggestion>* suggestions) const { | |
309 // Do not suggest anything if there's no suggestion data for the focused | |
310 // field. | |
311 if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field) | |
312 return; | |
313 | |
314 // Non-owned regions that match a field value by region key. | |
315 std::set<const RegionData*> regions_match_key; | |
316 | |
317 // Non-owned regions that match a field value by region name. | |
318 std::set<const RegionData*> regions_match_name; | |
319 | |
320 AddressSuggestions address_suggestions; | |
321 for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY; | |
322 ++i) { | |
323 AddressField address_field = static_cast<AddressField>(i); | |
324 AddressField parent_address_field = static_cast<AddressField>(i - 1); | |
325 | |
326 const std::string& field_value = user_input.GetFieldValue(address_field); | |
327 const std::string& parent_field_value = | |
328 user_input.GetFieldValue(parent_address_field); | |
329 | |
330 if (field_value.empty() && | |
331 (address_field == ADMIN_AREA || parent_field_value.empty())) { | |
332 address_suggestions.AllRegionsMatchForField(address_field); | |
333 continue; | |
334 } | |
335 | |
336 regions_match_key.clear(); | |
337 regions_match_name.clear(); | |
338 | |
339 const std::string& canonical_field_value = | |
340 field_value.empty() ? field_value | |
341 : canonicalizer_.Canonicalize(field_value); | |
342 const FieldTries* field_tries = field_tries_.find(address_field)->second; | |
343 field_tries->keys.FindDataForKeyPrefix(canonical_field_value, | |
344 ®ions_match_key); | |
345 field_tries->names.FindDataForKeyPrefix(canonical_field_value, | |
346 ®ions_match_name); | |
347 | |
348 bool added_suggestions = address_suggestions.AddRegions( | |
349 address_field, regions_match_key, regions_match_name); | |
350 | |
351 // Do not suggest anything if the focused field does not have suggestions. | |
352 if (address_field == focused_field && !added_suggestions) | |
353 return; | |
354 } | |
355 | |
356 address_suggestions.SwapSmallestSubRegionSuggestions(suggestions); | |
357 } | |
358 | |
359 private: | |
360 // The tries to lookup regions for a specific field by keys and names. For | |
361 // example, the FieldTries for ADMIN_AREA in US will have keys for "AL", "AK", | |
362 // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The | |
363 // struct is uncopyable due to Trie objects being uncopyable. | |
364 struct FieldTries { | |
365 Trie<const RegionData*> keys; | |
366 Trie<const RegionData*> names; | |
367 }; | |
368 | |
369 // Adds the sub-regions of |parent_region| into tries. | |
370 void AddSubRegionsOf(const RegionData& parent_region, | |
371 AddressField parent_field) { | |
372 DCHECK(!parent_region.sub_regions().empty()); | |
373 | |
374 AddressField address_field = static_cast<AddressField>(parent_field + 1); | |
375 DCHECK(address_field >= ADMIN_AREA); | |
376 DCHECK(address_field <= DEPENDENT_LOCALITY); | |
377 | |
378 FieldTries* field_tries = field_tries_[address_field]; | |
379 for (std::vector<const RegionData*>::const_iterator it = | |
380 parent_region.sub_regions().begin(); | |
381 it != parent_region.sub_regions().end(); | |
382 ++it) { | |
383 const RegionData* region = *it; | |
384 DCHECK(region); | |
385 | |
386 field_tries->keys.AddDataForKey( | |
387 canonicalizer_.Canonicalize(region->key()), region); | |
388 field_tries->names.AddDataForKey( | |
389 canonicalizer_.Canonicalize(region->name()), region); | |
390 | |
391 if (smallest_region_size_ < address_field) | |
392 smallest_region_size_ = address_field; | |
393 | |
394 if (!region->sub_regions().empty()) | |
395 AddSubRegionsOf(*region, address_field); | |
396 } | |
397 } | |
398 | |
399 // Owned tries to lookup regions for ADMIN_AREA, LOCALITY, and | |
400 // DEPENDENT_LOCALITY. | |
401 std::map<AddressField, FieldTries*> field_tries_; | |
402 | |
403 // The smallest size of a sub-region that has data. For example, this is | |
404 // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN. | |
405 AddressField smallest_region_size_; | |
406 | |
407 // A shared instance of string canonicalizer for case and diacritic comparison | |
408 // of region keys and names. | |
409 const StringCanonicalizer& canonicalizer_; | |
410 | |
411 DISALLOW_COPY_AND_ASSIGN(SubRegionData); | |
412 }; | |
413 | |
414 InputSuggester::InputSuggester(PreloadSupplier* supplier) | |
415 : region_data_builder_(supplier), | |
416 input_helper_(supplier), | |
417 validator_(supplier), | |
418 validated_(BuildCallback(this, &InputSuggester::Validated)) {} | |
419 | |
420 InputSuggester::~InputSuggester() { | |
421 STLDeleteValues(&sub_regions_); | |
422 } | |
423 | |
424 void InputSuggester::GetSuggestions(const AddressData& user_input, | |
425 AddressField focused_field, | |
426 size_t suggestions_limit, | |
427 std::vector<AddressData>* suggestions) { | |
428 DCHECK(suggestions); | |
429 DCHECK(focused_field == POSTAL_CODE || | |
430 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY)); | |
431 | |
432 AddressData address_copy = user_input; | |
433 | |
434 // Do not suggest anything if the user input is empty. | |
435 if (address_copy.IsFieldEmpty(focused_field)) | |
436 return; | |
437 | |
438 if (focused_field == POSTAL_CODE) { | |
439 // Do not suggest anything if the user is typing an invalid postal code. | |
440 FieldProblemMap problems; | |
441 FieldProblemMap filter; | |
442 filter.insert(std::make_pair(POSTAL_CODE, INVALID_FORMAT)); | |
443 validator_.Validate(address_copy, | |
444 true, // Allow postal office boxes. | |
445 false, // Do not require recipient name. | |
446 &filter, | |
447 &problems, | |
448 *validated_); | |
449 if (!problems.empty()) | |
450 return; | |
451 | |
452 // Fill in the sub-regions based on the postal code. | |
453 input_helper_.FillAddress(&address_copy); | |
454 } | |
455 | |
456 // Lazily initialize the mapping from COUNTRY level regions to all of their | |
457 // sub-regions with metadata for generating suggestions. | |
458 std::string unused_best_language; | |
459 const RegionData& region_data = | |
460 region_data_builder_.Build(address_copy.region_code, | |
461 address_copy.language_code, | |
462 &unused_best_language); | |
463 std::map<const RegionData*, const SubRegionData*>::iterator | |
464 sub_region_data_it = sub_regions_.find(®ion_data); | |
465 if (sub_region_data_it == sub_regions_.end()) { | |
466 if (!canonicalizer_) { | |
Evan Stade
2014/06/27 01:26:58
nit: no curlies
please use gerrit instead
2014/06/27 08:43:38
Done.
| |
467 canonicalizer_.reset(new StringCanonicalizer); | |
468 } | |
469 sub_region_data_it = | |
470 sub_regions_.insert(std::make_pair(®ion_data, | |
471 new SubRegionData(region_data, | |
472 *canonicalizer_))) | |
473 .first; | |
474 } | |
475 DCHECK(sub_region_data_it->second); | |
476 | |
477 // Build the list of regions that match |address_copy| when the user is typing | |
478 // in the |focused_field|. | |
479 std::vector<Suggestion> suggested_regions; | |
480 sub_region_data_it->second->BuildSuggestions( | |
481 address_copy, focused_field, &suggested_regions); | |
482 | |
483 FieldProblemMap problems; | |
484 FieldProblemMap filter; | |
485 filter.insert(std::make_pair(POSTAL_CODE, MISMATCHING_VALUE)); | |
486 | |
487 // Generate suggestions based on the regions. | |
488 for (std::vector<Suggestion>::const_iterator suggested_region_it = | |
489 suggested_regions.begin(); | |
490 suggested_region_it != suggested_regions.end(); | |
491 ++suggested_region_it) { | |
492 AddressData address; | |
493 address.region_code = address_copy.region_code; | |
494 address.postal_code = address_copy.postal_code; | |
495 | |
496 // Traverse the tree of regions from the smallest |region_to_suggest| to the | |
497 // country-wide "root" of the tree. Use the region names or keys found at | |
498 // each of the levels of the tree to build the |address| to suggest. | |
499 AddressField address_field = suggested_region_it->matching_address_field; | |
500 for (const RegionData* region = suggested_region_it->region_to_suggest; | |
501 region->has_parent(); | |
502 region = ®ion->parent()) { | |
503 address.SetFieldValue(address_field, | |
504 suggested_region_it->region_key_matches | |
505 ? region->key() | |
506 : region->name()); | |
507 address_field = static_cast<AddressField>(address_field - 1); | |
508 } | |
509 | |
510 // Do not suggest an address with a mismatching postal code. | |
511 problems.clear(); | |
512 validator_.Validate(address_copy, | |
Evan Stade
2014/06/27 01:26:58
these two short circuits (continue and return) ---
please use gerrit instead
2014/06/27 08:43:38
THank you for bring this to my attention! I notice
| |
513 true, // Allow postal office boxes. | |
514 false, // Do not require recipient name. | |
515 &filter, | |
516 &problems, | |
517 *validated_); | |
518 if (!problems.empty()) | |
519 continue; | |
520 | |
521 // Do not add more suggestions than |suggestions_limit|. | |
522 if (suggestions->size() >= suggestions_limit) { | |
523 suggestions->clear(); | |
524 return; | |
525 } | |
526 | |
527 suggestions->push_back(address); | |
528 } | |
529 } | |
530 | |
531 void InputSuggester::Validated(bool success, | |
532 const AddressData&, | |
533 const FieldProblemMap&) { | |
534 DCHECK(success); | |
535 } | |
536 | |
537 } // namespace autofill | |
OLD | NEW |