Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(504)

Side by Side Diff: third_party/libaddressinput/chromium/cpp/src/address_validator.cc

Issue 389863002: Remove Chrome's own version of libaddressinput in favor of the upstream. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (C) 2013 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <libaddressinput/address_validator.h>
16
17 #include <libaddressinput/address_data.h>
18 #include <libaddressinput/downloader.h>
19 #include <libaddressinput/load_rules_delegate.h>
20 #include <libaddressinput/storage.h>
21 #include <libaddressinput/util/basictypes.h>
22 #include <libaddressinput/util/scoped_ptr.h>
23
24 #include <algorithm>
25 #include <bitset>
26 #include <cassert>
27 #include <cstddef>
28 #include <map>
29 #include <set>
30 #include <string>
31 #include <utility>
32 #include <vector>
33
34 #include <re2/re2.h>
35
36 #include "country_rules_aggregator.h"
37 #include "grit.h"
38 #include "grit/libaddressinput_strings.h"
39 #include "region_data_constants.h"
40 #include "retriever.h"
41 #include "rule.h"
42 #include "ruleset.h"
43 #include "util/stl_util.h"
44 #include "util/string_util.h"
45
46 namespace i18n {
47 namespace addressinput {
48
49 namespace {
50
51 // A type to store a list of pointers to Ruleset objects.
52 typedef std::set<const Ruleset*> Rulesets;
53
54 // A type to map the field in a rule to rulesets.
55 typedef std::map<Rule::IdentityField, Rulesets> IdentityFieldRulesets;
56
57 // A type to map the field in an address to rulesets.
58 typedef std::map<AddressField, IdentityFieldRulesets> AddressFieldRulesets;
59
60 // A set of Rule::IdentityField values that match user input.
61 typedef std::bitset<Rule::IDENTITY_FIELDS_SIZE> MatchingRuleFields;
62
63 // Returns true if |prefix_regex| matches a prefix of |value|. For example,
64 // "(90|81)" matches a prefix of "90291".
65 bool ValueMatchesPrefixRegex(const std::string& value,
66 const std::string& prefix_regex) {
67 return RE2::FullMatch(value, "^(" + prefix_regex + ").*");
68 }
69
70 // Returns true if the filter is empty (all problems allowed) or contains the
71 // |field|->|problem| mapping (explicitly allowed).
72 bool FilterAllows(const AddressProblemFilter& filter,
73 AddressField field,
74 AddressProblem::Type problem) {
75 if (filter.empty()) {
76 return true;
77 }
78
79 for (AddressProblemFilter::const_iterator it = filter.begin();
80 it != filter.end(); ++it) {
81 if (it->first == field && it->second == problem) {
82 return true;
83 }
84 }
85
86 return false;
87 }
88
89 // Returns |true| if the |street_address| is empty or contains only empty
90 // strings.
91 bool IsEmptyStreetAddress(const std::vector<std::string>& street_address) {
92 for (std::vector<std::string>::const_iterator it = street_address.begin();
93 it != street_address.end(); ++it) {
94 if (!it->empty()) {
95 return false;
96 }
97 }
98 return true;
99 }
100
101 // Returns the ID of the string that should be displayed when the given field
102 // is invalid in the context of |country_rule|.
103 int GetInvalidFieldMessageId(const Rule& country_rule, AddressField field) {
104 switch (field) {
105 case LOCALITY:
106 return IDS_LIBADDRESSINPUT_I18N_INVALID_LOCALITY_LABEL;
107 case DEPENDENT_LOCALITY:
108 return IDS_LIBADDRESSINPUT_I18N_INVALID_DEPENDENT_LOCALITY_LABEL;
109
110 case ADMIN_AREA: {
111 const std::string& admin_area_name_type =
112 country_rule.GetAdminAreaNameType();
113 if (admin_area_name_type == "area") {
114 return IDS_LIBADDRESSINPUT_I18N_INVALID_AREA;
115 }
116 if (admin_area_name_type == "county") {
117 return IDS_LIBADDRESSINPUT_I18N_INVALID_COUNTY_LABEL;
118 }
119 if (admin_area_name_type == "department") {
120 return IDS_LIBADDRESSINPUT_I18N_INVALID_DEPARTMENT;
121 }
122 if (admin_area_name_type == "district") {
123 return IDS_LIBADDRESSINPUT_I18N_INVALID_DEPENDENT_LOCALITY_LABEL;
124 }
125 if (admin_area_name_type == "do_si") {
126 return IDS_LIBADDRESSINPUT_I18N_INVALID_DO_SI;
127 }
128 if (admin_area_name_type == "emirate") {
129 return IDS_LIBADDRESSINPUT_I18N_INVALID_EMIRATE;
130 }
131 if (admin_area_name_type == "island") {
132 return IDS_LIBADDRESSINPUT_I18N_INVALID_ISLAND;
133 }
134 if (admin_area_name_type == "parish") {
135 return IDS_LIBADDRESSINPUT_I18N_INVALID_PARISH;
136 }
137 if (admin_area_name_type == "prefecture") {
138 return IDS_LIBADDRESSINPUT_I18N_INVALID_PREFECTURE;
139 }
140 if (admin_area_name_type == "province") {
141 return IDS_LIBADDRESSINPUT_I18N_INVALID_PROVINCE;
142 }
143 if (admin_area_name_type == "state") {
144 return IDS_LIBADDRESSINPUT_I18N_INVALID_STATE_LABEL;
145 }
146 return INVALID_MESSAGE_ID;
147 }
148
149 case POSTAL_CODE: {
150 const std::string& postal_code_name_type =
151 country_rule.GetPostalCodeNameType();
152 if (postal_code_name_type == "postal") {
153 return IDS_LIBADDRESSINPUT_I18N_INVALID_POSTAL_CODE_LABEL;
154 }
155 if (postal_code_name_type == "zip") {
156 return IDS_LIBADDRESSINPUT_I18N_INVALID_ZIP_CODE_LABEL;
157 }
158 return INVALID_MESSAGE_ID;
159 }
160
161 default:
162 return IDS_LIBADDRESSINPUT_I18N_INVALID_ENTRY;
163 }
164 }
165
166 // Collects rulesets based on whether they have a parent in the given list.
167 class ParentedRulesetCollector {
168 public:
169 // Retains a reference to both of the parameters. Does not make a copy of
170 // |parent_rulesets|. Does not take ownership of |rulesets_with_parents|. The
171 // |rulesets_with_parents| parameter should not be NULL.
172 ParentedRulesetCollector(const Rulesets& parent_rulesets,
173 Rulesets* rulesets_with_parents)
174 : parent_rulesets_(parent_rulesets),
175 rulesets_with_parents_(rulesets_with_parents) {
176 assert(rulesets_with_parents_ != NULL);
177 }
178
179 ~ParentedRulesetCollector() {}
180
181 // Adds |ruleset_to_test| to the |rulesets_with_parents_| collection, if the
182 // given ruleset has a parent in |parent_rulesets_|. The |ruleset_to_test|
183 // parameter should not be NULL.
184 void operator()(const Ruleset* ruleset_to_test) {
185 assert(ruleset_to_test != NULL);
186 if (parent_rulesets_.find(ruleset_to_test->parent()) !=
187 parent_rulesets_.end()) {
188 rulesets_with_parents_->insert(ruleset_to_test);
189 }
190 }
191
192 private:
193 const Rulesets& parent_rulesets_;
194 Rulesets* rulesets_with_parents_;
195 };
196
197 // Validates AddressData structure.
198 class AddressValidatorImpl : public AddressValidator {
199 public:
200 // Takes ownership of |downloader| and |storage|. Does not take ownership of
201 // |load_rules_delegate|.
202 AddressValidatorImpl(const std::string& validation_data_url,
203 scoped_ptr<Downloader> downloader,
204 scoped_ptr<Storage> storage,
205 LoadRulesDelegate* load_rules_delegate)
206 : aggregator_(scoped_ptr<Retriever>(new Retriever(
207 validation_data_url,
208 downloader.Pass(),
209 storage.Pass()))),
210 load_rules_delegate_(load_rules_delegate),
211 loading_rules_(),
212 rules_() {}
213
214 virtual ~AddressValidatorImpl() {
215 STLDeleteValues(&rules_);
216 }
217
218 // AddressValidator implementation.
219 virtual void LoadRules(const std::string& country_code) {
220 if (rules_.find(country_code) == rules_.end() &&
221 loading_rules_.find(country_code) == loading_rules_.end()) {
222 loading_rules_.insert(country_code);
223 aggregator_.AggregateRules(
224 country_code,
225 BuildScopedPtrCallback(this, &AddressValidatorImpl::OnRulesLoaded));
226 }
227 }
228
229 // AddressValidator implementation.
230 virtual Status ValidateAddress(
231 const AddressData& address,
232 const AddressProblemFilter& filter,
233 AddressProblems* problems) const {
234 std::map<std::string, Ruleset*>::const_iterator ruleset_it =
235 rules_.find(address.region_code);
236
237 // We can still validate the required fields even if the full ruleset isn't
238 // ready.
239 if (ruleset_it == rules_.end()) {
240 if (problems != NULL) {
241 Rule rule;
242 rule.CopyFrom(Rule::GetDefault());
243 if (rule.ParseSerializedRule(
244 RegionDataConstants::GetRegionData(address.region_code))) {
245 EnforceRequiredFields(rule, address, filter, problems);
246 }
247 }
248
249 return loading_rules_.find(address.region_code) != loading_rules_.end()
250 ? RULES_NOT_READY
251 : RULES_UNAVAILABLE;
252 }
253
254 if (problems == NULL) {
255 return SUCCESS;
256 }
257
258 const Ruleset* ruleset = ruleset_it->second;
259 assert(ruleset != NULL);
260 const Rule& country_rule =
261 ruleset->GetLanguageCodeRule(address.language_code);
262 EnforceRequiredFields(country_rule, address, filter, problems);
263
264 // Validate general postal code format. A country-level rule specifies the
265 // regular expression for the whole postal code.
266 if (!address.postal_code.empty() &&
267 !country_rule.GetPostalCodeFormat().empty() &&
268 FilterAllows(filter,
269 POSTAL_CODE,
270 AddressProblem::UNRECOGNIZED_FORMAT) &&
271 !RE2::FullMatch(
272 address.postal_code, country_rule.GetPostalCodeFormat())) {
273 problems->push_back(AddressProblem(
274 POSTAL_CODE,
275 AddressProblem::UNRECOGNIZED_FORMAT,
276 GetInvalidFieldMessageId(country_rule, POSTAL_CODE)));
277 }
278
279 while (ruleset != NULL) {
280 const Rule& rule = ruleset->GetLanguageCodeRule(address.language_code);
281
282 // Validate the field values, e.g. state names in US.
283 AddressField sub_field_type =
284 static_cast<AddressField>(ruleset->field() + 1);
285 std::string sub_key;
286 const std::string& user_input = address.GetFieldValue(sub_field_type);
287 if (!user_input.empty() &&
288 FilterAllows(filter, sub_field_type, AddressProblem::UNKNOWN_VALUE) &&
289 !rule.CanonicalizeSubKey(user_input, false, &sub_key)) {
290 problems->push_back(AddressProblem(
291 sub_field_type,
292 AddressProblem::UNKNOWN_VALUE,
293 GetInvalidFieldMessageId(country_rule, sub_field_type)));
294 }
295
296 // Validate sub-region specific postal code format. A sub-region specifies
297 // the regular expression for a prefix of the postal code.
298 if (ruleset->field() > COUNTRY &&
299 !address.postal_code.empty() &&
300 !rule.GetPostalCodeFormat().empty() &&
301 FilterAllows(filter,
302 POSTAL_CODE,
303 AddressProblem::MISMATCHING_VALUE) &&
304 !ValueMatchesPrefixRegex(
305 address.postal_code, rule.GetPostalCodeFormat())) {
306 problems->push_back(AddressProblem(
307 POSTAL_CODE,
308 AddressProblem::MISMATCHING_VALUE,
309 GetInvalidFieldMessageId(country_rule, POSTAL_CODE)));
310 }
311
312 ruleset = ruleset->GetSubRegionRuleset(sub_key);
313 }
314
315 return SUCCESS;
316 }
317
318 // AddressValidator implementation.
319 virtual Status GetSuggestions(const AddressData& user_input,
320 AddressField focused_field,
321 size_t suggestions_limit,
322 std::vector<AddressData>* suggestions) const {
323 std::map<std::string, Ruleset*>::const_iterator ruleset_it =
324 rules_.find(user_input.region_code);
325
326 if (ruleset_it == rules_.end()) {
327 return
328 loading_rules_.find(user_input.region_code) != loading_rules_.end()
329 ? RULES_NOT_READY
330 : RULES_UNAVAILABLE;
331 }
332
333 if (suggestions == NULL) {
334 return SUCCESS;
335 }
336 suggestions->clear();
337
338 assert(ruleset_it->second != NULL);
339
340 // Do not suggest anything if the user is typing in the field for which
341 // there's no validation data.
342 if (focused_field != POSTAL_CODE &&
343 (focused_field < ADMIN_AREA || focused_field > DEPENDENT_LOCALITY)) {
344 return SUCCESS;
345 }
346
347 // Do not suggest anything if the user input is empty.
348 if (user_input.GetFieldValue(focused_field).empty()) {
349 return SUCCESS;
350 }
351
352 const Ruleset& country_ruleset = *ruleset_it->second;
353 const Rule& country_rule =
354 country_ruleset.GetLanguageCodeRule(user_input.language_code);
355
356 // Do not suggest anything if the user is typing the postal code that is not
357 // valid for the country.
358 if (!user_input.postal_code.empty() &&
359 focused_field == POSTAL_CODE &&
360 !country_rule.GetPostalCodeFormat().empty() &&
361 !ValueMatchesPrefixRegex(
362 user_input.postal_code, country_rule.GetPostalCodeFormat())) {
363 return SUCCESS;
364 }
365
366 // Initialize the prefix search index lazily.
367 if (!ruleset_it->second->prefix_search_index_ready()) {
368 ruleset_it->second->BuildPrefixSearchIndex();
369 }
370
371 if (focused_field != POSTAL_CODE &&
372 focused_field > country_ruleset.deepest_ruleset_level()) {
373 return SUCCESS;
374 }
375
376 // Determine the most specific address field that can be suggested.
377 AddressField suggestion_field = focused_field != POSTAL_CODE
378 ? focused_field : DEPENDENT_LOCALITY;
379 if (suggestion_field > country_ruleset.deepest_ruleset_level()) {
380 suggestion_field = country_ruleset.deepest_ruleset_level();
381 }
382 if (focused_field != POSTAL_CODE) {
383 while (user_input.GetFieldValue(suggestion_field).empty() &&
384 suggestion_field > ADMIN_AREA) {
385 suggestion_field = static_cast<AddressField>(suggestion_field - 1);
386 }
387 }
388
389 // Find all rulesets that match user input.
390 AddressFieldRulesets rulesets;
391 for (int i = ADMIN_AREA; i <= suggestion_field; ++i) {
392 for (int j = Rule::KEY; j <= Rule::LATIN_NAME; ++j) {
393 AddressField address_field = static_cast<AddressField>(i);
394 Rule::IdentityField rule_field = static_cast<Rule::IdentityField>(j);
395
396 // Find all rulesets at |address_field| level whose |rule_field| starts
397 // with user input value.
398 country_ruleset.FindRulesetsByPrefix(
399 user_input.language_code, address_field, rule_field,
400 user_input.GetFieldValue(address_field),
401 &rulesets[address_field][rule_field]);
402
403 // Filter out the rulesets whose parents do not match the user input.
404 if (address_field > ADMIN_AREA) {
405 AddressField parent_field =
406 static_cast<AddressField>(address_field - 1);
407 Rulesets rulesets_with_parents;
408 std::for_each(
409 rulesets[address_field][rule_field].begin(),
410 rulesets[address_field][rule_field].end(),
411 ParentedRulesetCollector(rulesets[parent_field][rule_field],
412 &rulesets_with_parents));
413 rulesets[address_field][rule_field].swap(rulesets_with_parents);
414 }
415 }
416 }
417
418 // Determine the fields in the rules that match the user input. This
419 // operation converts a map of Rule::IdentityField value -> Ruleset into a
420 // map of Ruleset -> Rule::IdentityField bitset.
421 std::map<const Ruleset*, MatchingRuleFields> suggestion_rulesets;
422 for (IdentityFieldRulesets::const_iterator rule_field_it =
423 rulesets[suggestion_field].begin();
424 rule_field_it != rulesets[suggestion_field].end();
425 ++rule_field_it) {
426 const Rule::IdentityField rule_identity_field = rule_field_it->first;
427 for (Rulesets::const_iterator ruleset_it = rule_field_it->second.begin();
428 ruleset_it != rule_field_it->second.end();
429 ++ruleset_it) {
430 suggestion_rulesets[*ruleset_it].set(rule_identity_field);
431 }
432 }
433
434 // Generate suggestions based on the rulesets. Use a Rule::IdentityField
435 // from the bitset to generate address field values.
436 for (std::map<const Ruleset*, MatchingRuleFields>::const_iterator
437 suggestion_it = suggestion_rulesets.begin();
438 suggestion_it != suggestion_rulesets.end();
439 ++suggestion_it) {
440 const Ruleset& ruleset = *suggestion_it->first;
441 const Rule& rule = ruleset.GetLanguageCodeRule(user_input.language_code);
442 const MatchingRuleFields& matching_rule_fields = suggestion_it->second;
443
444 // Do not suggest this region if the postal code in user input does not
445 // match it.
446 if (!user_input.postal_code.empty() &&
447 !rule.GetPostalCodeFormat().empty() &&
448 !ValueMatchesPrefixRegex(
449 user_input.postal_code, rule.GetPostalCodeFormat())) {
450 continue;
451 }
452
453 // Do not add more suggestions than |suggestions_limit|.
454 if (suggestions->size() >= suggestions_limit) {
455 suggestions->clear();
456 return SUCCESS;
457 }
458
459 // If the user's language is not one of the supported languages of a
460 // country that has latinized names for its regions, then prefer to
461 // suggest the latinized region names. If the user types in local script
462 // instead, then the local script names will be suggested.
463 Rule::IdentityField rule_field = Rule::KEY;
464 if (!country_rule.GetLanguage().empty() &&
465 country_rule.GetLanguage() != user_input.language_code &&
466 !rule.GetLatinName().empty() &&
467 matching_rule_fields.test(Rule::LATIN_NAME)) {
468 rule_field = Rule::LATIN_NAME;
469 } else if (matching_rule_fields.test(Rule::KEY)) {
470 rule_field = Rule::KEY;
471 } else if (matching_rule_fields.test(Rule::NAME)) {
472 rule_field = Rule::NAME;
473 } else if (matching_rule_fields.test(Rule::LATIN_NAME)) {
474 rule_field = Rule::LATIN_NAME;
475 } else {
476 assert(false);
477 }
478
479 AddressData suggestion;
480 suggestion.region_code = user_input.region_code;
481 suggestion.postal_code = user_input.postal_code;
482
483 // Traverse the tree of rulesets from the most specific |ruleset| to the
484 // country-wide "root" of the tree. Use the region names found at each of
485 // the levels of the ruleset tree to build the |suggestion|.
486 for (const Ruleset* suggestion_ruleset = &ruleset;
487 suggestion_ruleset->parent() != NULL;
488 suggestion_ruleset = suggestion_ruleset->parent()) {
489 const Rule& suggestion_rule =
490 suggestion_ruleset->GetLanguageCodeRule(user_input.language_code);
491 suggestion.SetFieldValue(suggestion_ruleset->field(),
492 suggestion_rule.GetIdentityField(rule_field));
493 }
494
495 suggestions->push_back(suggestion);
496 }
497
498 return SUCCESS;
499 }
500
501 // AddressValidator implementation.
502 virtual bool CanonicalizeAdministrativeArea(AddressData* address_data) const {
503 std::map<std::string, Ruleset*>::const_iterator ruleset_it =
504 rules_.find(address_data->region_code);
505 if (ruleset_it == rules_.end()) {
506 return false;
507 }
508 const Rule& rule =
509 ruleset_it->second->GetLanguageCodeRule(address_data->language_code);
510
511 return rule.CanonicalizeSubKey(address_data->administrative_area,
512 true, // Keep input latin.
513 &address_data->administrative_area);
514 }
515
516 private:
517 // Called when CountryRulesAggregator::AggregateRules loads the |ruleset| for
518 // the |country_code|.
519 void OnRulesLoaded(bool success,
520 const std::string& country_code,
521 scoped_ptr<Ruleset> ruleset) {
522 assert(rules_.find(country_code) == rules_.end());
523 loading_rules_.erase(country_code);
524 if (success) {
525 assert(ruleset != NULL);
526 assert(ruleset->field() == COUNTRY);
527 rules_[country_code] = ruleset.release();
528 }
529 if (load_rules_delegate_ != NULL) {
530 load_rules_delegate_->OnAddressValidationRulesLoaded(
531 country_code, success);
532 }
533 }
534
535 // Adds problems for just the required fields portion of |country_rule|.
536 void EnforceRequiredFields(const Rule& country_rule,
537 const AddressData& address,
538 const AddressProblemFilter& filter,
539 AddressProblems* problems) const {
540 assert(problems != NULL);
541 for (std::vector<AddressField>::const_iterator
542 field_it = country_rule.GetRequired().begin();
543 field_it != country_rule.GetRequired().end();
544 ++field_it) {
545 bool field_empty = *field_it != STREET_ADDRESS
546 ? address.GetFieldValue(*field_it).empty()
547 : IsEmptyStreetAddress(address.address_line);
548 if (field_empty &&
549 FilterAllows(
550 filter, *field_it, AddressProblem::MISSING_REQUIRED_FIELD)) {
551 problems->push_back(AddressProblem(
552 *field_it,
553 AddressProblem::MISSING_REQUIRED_FIELD,
554 IDS_LIBADDRESSINPUT_I18N_MISSING_REQUIRED_FIELD));
555 }
556 }
557 }
558
559 // Loads the ruleset for a country code.
560 CountryRulesAggregator aggregator_;
561
562 // An optional delegate to be invoked when a ruleset finishes loading.
563 LoadRulesDelegate* load_rules_delegate_;
564
565 // A set of country codes for which a ruleset is being loaded.
566 std::set<std::string> loading_rules_;
567
568 // A mapping of a country code to the owned ruleset for that country code.
569 std::map<std::string, Ruleset*> rules_;
570
571 DISALLOW_COPY_AND_ASSIGN(AddressValidatorImpl);
572 };
573
574 } // namespace
575
576 AddressValidator::~AddressValidator() {}
577
578 // static
579 scoped_ptr<AddressValidator> AddressValidator::Build(
580 scoped_ptr<Downloader> downloader,
581 scoped_ptr<Storage> storage,
582 LoadRulesDelegate* load_rules_delegate) {
583 return scoped_ptr<AddressValidator>(new AddressValidatorImpl(
584 VALIDATION_DATA_URL, downloader.Pass(), storage.Pass(),
585 load_rules_delegate));
586 }
587
588 scoped_ptr<AddressValidator> BuildAddressValidatorForTesting(
589 const std::string& validation_data_url,
590 scoped_ptr<Downloader> downloader,
591 scoped_ptr<Storage> storage,
592 LoadRulesDelegate* load_rules_delegate) {
593 return scoped_ptr<AddressValidator>(new AddressValidatorImpl(
594 validation_data_url, downloader.Pass(), storage.Pass(),
595 load_rules_delegate));
596 }
597
598 } // namespace addressinput
599 } // namespace i18n
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698