OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/common/android/address_parser_internal.h" | 5 #include "content/common/android/address_parser_internal.h" |
6 | 6 |
7 #include <bitset> | 7 #include <bitset> |
8 | 8 |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/strings/string_util.h" | 10 #include "base/strings/string_util.h" |
11 | 11 |
12 namespace { | 12 namespace { |
13 | 13 |
14 // Number of digits for a valid zip code. | 14 // Number of digits for a valid zip code. |
15 const size_t kZipDigits = 5; | 15 const size_t kZipDigits = 5; |
16 | 16 |
17 // Number of digits for a valid zip code in the Zip Plus 4 format. | 17 // Number of digits for a valid zip code in the Zip Plus 4 format. |
18 const size_t kZipPlus4Digits = 9; | 18 const size_t kZipPlus4Digits = 9; |
19 | 19 |
20 // Maximum number of digits of a house number, including possible hyphens. | 20 // Maximum number of digits of a house number, including possible hyphens. |
21 const size_t kMaxHouseDigits = 5; | 21 const size_t kMaxHouseDigits = 5; |
22 | 22 |
23 char16 SafePreviousChar(const base::string16::const_iterator& it, | 23 base::char16 SafePreviousChar(const base::string16::const_iterator& it, |
24 const base::string16::const_iterator& begin) { | 24 const base::string16::const_iterator& begin) { |
25 if (it == begin) | 25 if (it == begin) |
26 return ' '; | 26 return ' '; |
27 return *(it - 1); | 27 return *(it - 1); |
28 } | 28 } |
29 | 29 |
30 char16 SafeNextChar(const base::string16::const_iterator& it, | 30 base::char16 SafeNextChar(const base::string16::const_iterator& it, |
31 const base::string16::const_iterator& end) { | 31 const base::string16::const_iterator& end) { |
32 if (it == end) | 32 if (it == end) |
33 return ' '; | 33 return ' '; |
34 return *(it + 1); | 34 return *(it + 1); |
35 } | 35 } |
36 | 36 |
37 bool WordLowerCaseEqualsASCII(base::string16::const_iterator word_begin, | 37 bool WordLowerCaseEqualsASCII(base::string16::const_iterator word_begin, |
38 base::string16::const_iterator word_end, const char* ascii_to_match) { | 38 base::string16::const_iterator word_end, const char* ascii_to_match) { |
39 for (base::string16::const_iterator it = word_begin; it != word_end; | 39 for (base::string16::const_iterator it = word_begin; it != word_end; |
40 ++it, ++ascii_to_match) { | 40 ++it, ++ascii_to_match) { |
(...skipping 25 matching lines...) Expand all Loading... |
66 | 66 |
67 namespace internal { | 67 namespace internal { |
68 | 68 |
69 Word::Word(const base::string16::const_iterator& begin, | 69 Word::Word(const base::string16::const_iterator& begin, |
70 const base::string16::const_iterator& end) | 70 const base::string16::const_iterator& end) |
71 : begin(begin), | 71 : begin(begin), |
72 end(end) { | 72 end(end) { |
73 DCHECK(begin <= end); | 73 DCHECK(begin <= end); |
74 } | 74 } |
75 | 75 |
76 bool HouseNumberParser::IsPreDelimiter(char16 character) { | 76 bool HouseNumberParser::IsPreDelimiter(base::char16 character) { |
77 return character == ':' || IsPostDelimiter(character); | 77 return character == ':' || IsPostDelimiter(character); |
78 } | 78 } |
79 | 79 |
80 bool HouseNumberParser::IsPostDelimiter(char16 character) { | 80 bool HouseNumberParser::IsPostDelimiter(base::char16 character) { |
81 return IsWhitespace(character) || strchr(",\"'", character); | 81 return IsWhitespace(character) || strchr(",\"'", character); |
82 } | 82 } |
83 | 83 |
84 void HouseNumberParser::RestartOnNextDelimiter() { | 84 void HouseNumberParser::RestartOnNextDelimiter() { |
85 ResetState(); | 85 ResetState(); |
86 for (; it_ != end_ && !IsPreDelimiter(*it_); ++it_) {} | 86 for (; it_ != end_ && !IsPreDelimiter(*it_); ++it_) {} |
87 } | 87 } |
88 | 88 |
89 void HouseNumberParser::AcceptChars(size_t num_chars) { | 89 void HouseNumberParser::AcceptChars(size_t num_chars) { |
90 size_t offset = std::min(static_cast<size_t>(std::distance(it_, end_)), | 90 size_t offset = std::min(static_cast<size_t>(std::distance(it_, end_)), |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
154 if (it_ + 3 <= end_ && LowerCaseEqualsASCII(it_, it_ + 3, "one")) | 154 if (it_ + 3 <= end_ && LowerCaseEqualsASCII(it_, it_ + 3, "one")) |
155 AcceptChars(3); | 155 AcceptChars(3); |
156 else | 156 else |
157 RestartOnNextDelimiter(); | 157 RestartOnNextDelimiter(); |
158 continue; | 158 continue; |
159 } | 159 } |
160 | 160 |
161 // There should be more than 1 character because of result_chars. | 161 // There should be more than 1 character because of result_chars. |
162 DCHECK_GT(result_chars_, 0U); | 162 DCHECK_GT(result_chars_, 0U); |
163 DCHECK(it_ != begin_); | 163 DCHECK(it_ != begin_); |
164 char16 previous = SafePreviousChar(it_, begin_); | 164 base::char16 previous = SafePreviousChar(it_, begin_); |
165 if (IsAsciiDigit(previous)) { | 165 if (IsAsciiDigit(previous)) { |
166 // Check cases like '12A'. | 166 // Check cases like '12A'. |
167 char16 next = SafeNextChar(it_, end_); | 167 base::char16 next = SafeNextChar(it_, end_); |
168 if (IsPostDelimiter(next)) { | 168 if (IsPostDelimiter(next)) { |
169 AcceptChars(1); | 169 AcceptChars(1); |
170 continue; | 170 continue; |
171 } | 171 } |
172 | 172 |
173 // Handle cases like 12a, 1st, 2nd, 3rd, 7th. | 173 // Handle cases like 12a, 1st, 2nd, 3rd, 7th. |
174 if (IsAsciiAlpha(next)) { | 174 if (IsAsciiAlpha(next)) { |
175 char16 last_digit = previous; | 175 base::char16 last_digit = previous; |
176 char16 first_letter = base::ToLowerASCII(*it_); | 176 base::char16 first_letter = base::ToLowerASCII(*it_); |
177 char16 second_letter = base::ToLowerASCII(next); | 177 base::char16 second_letter = base::ToLowerASCII(next); |
178 bool is_teen = SafePreviousChar(it_ - 1, begin_) == '1' && | 178 bool is_teen = SafePreviousChar(it_ - 1, begin_) == '1' && |
179 num_digits_ == 2; | 179 num_digits_ == 2; |
180 | 180 |
181 switch (last_digit - '0') { | 181 switch (last_digit - '0') { |
182 case 1: | 182 case 1: |
183 if ((first_letter == 's' && second_letter == 't') || | 183 if ((first_letter == 's' && second_letter == 't') || |
184 (first_letter == 't' && second_letter == 'h' && is_teen)) { | 184 (first_letter == 't' && second_letter == 'h' && is_teen)) { |
185 AcceptChars(2); | 185 AcceptChars(2); |
186 continue; | 186 continue; |
187 } | 187 } |
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
341 | 341 |
342 DCHECK_EQ(state_names_accumulative[arraysize(state_names_accumulative) - 1], | 342 DCHECK_EQ(state_names_accumulative[arraysize(state_names_accumulative) - 1], |
343 static_cast<int>(ARRAYSIZE_UNSAFE(state_names))); | 343 static_cast<int>(ARRAYSIZE_UNSAFE(state_names))); |
344 | 344 |
345 const Word& first_word = words->at(state_first_word); | 345 const Word& first_word = words->at(state_first_word); |
346 int length = first_word.end - first_word.begin; | 346 int length = first_word.end - first_word.begin; |
347 if (length < 2 || !IsAsciiAlpha(*first_word.begin)) | 347 if (length < 2 || !IsAsciiAlpha(*first_word.begin)) |
348 return false; | 348 return false; |
349 | 349 |
350 // No state names start with x, y, z. | 350 // No state names start with x, y, z. |
351 char16 first_letter = base::ToLowerASCII(*first_word.begin); | 351 base::char16 first_letter = base::ToLowerASCII(*first_word.begin); |
352 if (first_letter > 'w') | 352 if (first_letter > 'w') |
353 return false; | 353 return false; |
354 | 354 |
355 DCHECK(first_letter >= 'a'); | 355 DCHECK(first_letter >= 'a'); |
356 int first_index = first_letter - 'a'; | 356 int first_index = first_letter - 'a'; |
357 | 357 |
358 // Look for two-letter state names. | 358 // Look for two-letter state names. |
359 if (length == 2 && IsAsciiAlpha(*(first_word.begin + 1))) { | 359 if (length == 2 && IsAsciiAlpha(*(first_word.begin + 1))) { |
360 char16 second_letter = base::ToLowerASCII(*(first_word.begin + 1)); | 360 base::char16 second_letter = base::ToLowerASCII(*(first_word.begin + 1)); |
361 DCHECK(second_letter >= 'a'); | 361 DCHECK(second_letter >= 'a'); |
362 | 362 |
363 int second_index = second_letter - 'a'; | 363 int second_index = second_letter - 'a'; |
364 if (!(state_two_letter_suffix[first_index] & (1 << second_index))) | 364 if (!(state_two_letter_suffix[first_index] & (1 << second_index))) |
365 return false; | 365 return false; |
366 | 366 |
367 std::bitset<32> previous_suffixes = state_two_letter_suffix[first_index] & | 367 std::bitset<32> previous_suffixes = state_two_letter_suffix[first_index] & |
368 ((1 << second_index) - 1); | 368 ((1 << second_index) - 1); |
369 *state_last_word = state_first_word; | 369 *state_last_word = state_first_word; |
370 *state_index = state_two_letter_accumulative[first_index] + | 370 *state_index = state_two_letter_accumulative[first_index] + |
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
589 }; | 589 }; |
590 | 590 |
591 DCHECK_EQ( | 591 DCHECK_EQ( |
592 location_names_accumulative[arraysize(location_names_accumulative) - 1], | 592 location_names_accumulative[arraysize(location_names_accumulative) - 1], |
593 static_cast<int>(ARRAYSIZE_UNSAFE(location_names))); | 593 static_cast<int>(ARRAYSIZE_UNSAFE(location_names))); |
594 | 594 |
595 if (!IsAsciiAlpha(*word.begin)) | 595 if (!IsAsciiAlpha(*word.begin)) |
596 return false; | 596 return false; |
597 | 597 |
598 // No location names start with y, z. | 598 // No location names start with y, z. |
599 char16 first_letter = base::ToLowerASCII(*word.begin); | 599 base::char16 first_letter = base::ToLowerASCII(*word.begin); |
600 if (first_letter > 'x') | 600 if (first_letter > 'x') |
601 return false; | 601 return false; |
602 | 602 |
603 DCHECK(first_letter >= 'a'); | 603 DCHECK(first_letter >= 'a'); |
604 int index = first_letter - 'a'; | 604 int index = first_letter - 'a'; |
605 int length = std::distance(word.begin, word.end); | 605 int length = std::distance(word.begin, word.end); |
606 for (int i = location_names_accumulative[index]; | 606 for (int i = location_names_accumulative[index]; |
607 i < location_names_accumulative[index + 1]; ++i) { | 607 i < location_names_accumulative[index + 1]; ++i) { |
608 if (location_names[i].length != length && | 608 if (location_names[i].length != length && |
609 (location_names[i].allow_plural && | 609 (location_names[i].allow_plural && |
610 location_names[i].length + 1 != length)) { | 610 location_names[i].length + 1 != length)) { |
611 continue; | 611 continue; |
612 } | 612 } |
613 | 613 |
614 if (LowerCaseEqualsASCIIWithPlural(word.begin, word.end, | 614 if (LowerCaseEqualsASCIIWithPlural(word.begin, word.end, |
615 location_names[i].string, | 615 location_names[i].string, |
616 location_names[i].allow_plural)) { | 616 location_names[i].allow_plural)) { |
617 return true; | 617 return true; |
618 } | 618 } |
619 } | 619 } |
620 | 620 |
621 return false; | 621 return false; |
622 } | 622 } |
623 | 623 |
624 } // namespace internal | 624 } // namespace internal |
625 | 625 |
626 } // namespace address_parser | 626 } // namespace address_parser |
627 | 627 |
628 } // namespace content | 628 } // namespace content |
OLD | NEW |