Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(514)

Side by Side Diff: content/common/android/address_parser_internal.cc

Issue 2803163002: Move address parser and prefixes to android_webview/. (Closed)
Patch Set: Bring back ContentViewStatics import Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/common/android/address_parser_internal.h"
6
7 #include <bitset>
8
9 #include "base/logging.h"
10 #include "base/macros.h"
11 #include "base/strings/string_util.h"
12
13 namespace {
14
15 // Number of digits for a valid zip code.
16 const size_t kZipDigits = 5;
17
18 // Number of digits for a valid zip code in the Zip Plus 4 format.
19 const size_t kZipPlus4Digits = 9;
20
21 // Maximum number of digits of a house number, including possible hyphens.
22 const size_t kMaxHouseDigits = 5;
23
24 base::char16 SafePreviousChar(const base::string16::const_iterator& it,
25 const base::string16::const_iterator& begin) {
26 if (it == begin)
27 return ' ';
28 return *(it - 1);
29 }
30
31 base::char16 SafeNextChar(const base::string16::const_iterator& it,
32 const base::string16::const_iterator& end) {
33 if (it == end)
34 return ' ';
35 return *(it + 1);
36 }
37
38 bool WordLowerCaseEqualsASCII(base::string16::const_iterator word_begin,
39 base::string16::const_iterator word_end, const char* ascii_to_match) {
40 for (base::string16::const_iterator it = word_begin; it != word_end;
41 ++it, ++ascii_to_match) {
42 if (!*ascii_to_match || base::ToLowerASCII(*it) != *ascii_to_match)
43 return false;
44 }
45 return *ascii_to_match == 0 || *ascii_to_match == ' ';
46 }
47
48 bool LowerCaseEqualsASCIIWithPlural(base::string16::const_iterator word_begin,
49 base::string16::const_iterator word_end, const char* ascii_to_match,
50 bool allow_plural) {
51 for (base::string16::const_iterator it = word_begin; it != word_end;
52 ++it, ++ascii_to_match) {
53 if (!*ascii_to_match && allow_plural && *it == 's' && it + 1 == word_end)
54 return true;
55
56 if (!*ascii_to_match || base::ToLowerASCII(*it) != *ascii_to_match)
57 return false;
58 }
59 return *ascii_to_match == 0;
60 }
61
62 } // anonymous namespace
63
64 namespace content {
65
66 namespace address_parser {
67
68 namespace internal {
69
70 Word::Word() {
71 }
72
73 Word::Word(const base::string16::const_iterator& begin,
74 const base::string16::const_iterator& end)
75 : begin(begin),
76 end(end) {
77 DCHECK(begin <= end);
78 }
79
80 Word::Word(const Word& other) = default;
81
82 HouseNumberParser::HouseNumberParser() {
83 }
84
85 bool HouseNumberParser::IsPreDelimiter(base::char16 character) {
86 return character == ':' || IsPostDelimiter(character);
87 }
88
89 bool HouseNumberParser::IsPostDelimiter(base::char16 character) {
90 return base::IsUnicodeWhitespace(character) || strchr(",\"'", character);
91 }
92
93 void HouseNumberParser::RestartOnNextDelimiter() {
94 ResetState();
95 for (; it_ != end_ && !IsPreDelimiter(*it_); ++it_) {}
96 }
97
98 void HouseNumberParser::AcceptChars(size_t num_chars) {
99 size_t offset = std::min(static_cast<size_t>(std::distance(it_, end_)),
100 num_chars);
101 it_ += offset;
102 result_chars_ += offset;
103 }
104
105 void HouseNumberParser::SkipChars(size_t num_chars) {
106 it_ += std::min(static_cast<size_t>(std::distance(it_, end_)), num_chars);
107 }
108
109 void HouseNumberParser::ResetState() {
110 num_digits_ = 0;
111 result_chars_ = 0;
112 }
113
114 bool HouseNumberParser::CheckFinished(Word* word) const {
115 // There should always be a number after a hyphen.
116 if (result_chars_ == 0 || SafePreviousChar(it_, begin_) == '-')
117 return false;
118
119 if (word) {
120 word->begin = it_ - result_chars_;
121 word->end = it_;
122 }
123 return true;
124 }
125
126 bool HouseNumberParser::Parse(
127 const base::string16::const_iterator& begin,
128 const base::string16::const_iterator& end, Word* word) {
129 it_ = begin_ = begin;
130 end_ = end;
131 ResetState();
132
133 // Iterations only used as a fail-safe against any buggy infinite loops.
134 size_t iterations = 0;
135 size_t max_iterations = end - begin + 1;
136 for (; it_ != end_ && iterations < max_iterations; ++iterations) {
137
138 // Word finished case.
139 if (IsPostDelimiter(*it_)) {
140 if (CheckFinished(word))
141 return true;
142 else if (result_chars_)
143 ResetState();
144
145 SkipChars(1);
146 continue;
147 }
148
149 // More digits. There should be no more after a letter was found.
150 if (base::IsAsciiDigit(*it_)) {
151 if (num_digits_ >= kMaxHouseDigits) {
152 RestartOnNextDelimiter();
153 } else {
154 AcceptChars(1);
155 ++num_digits_;
156 }
157 continue;
158 }
159
160 if (base::IsAsciiAlpha(*it_)) {
161 // Handle special case 'one'.
162 if (result_chars_ == 0) {
163 if (it_ + 3 <= end_ &&
164 base::LowerCaseEqualsASCII(base::StringPiece16(it_, it_ + 3),
165 "one"))
166 AcceptChars(3);
167 else
168 RestartOnNextDelimiter();
169 continue;
170 }
171
172 // There should be more than 1 character because of result_chars.
173 DCHECK_GT(result_chars_, 0U);
174 DCHECK(it_ != begin_);
175 base::char16 previous = SafePreviousChar(it_, begin_);
176 if (base::IsAsciiDigit(previous)) {
177 // Check cases like '12A'.
178 base::char16 next = SafeNextChar(it_, end_);
179 if (IsPostDelimiter(next)) {
180 AcceptChars(1);
181 continue;
182 }
183
184 // Handle cases like 12a, 1st, 2nd, 3rd, 7th.
185 if (base::IsAsciiAlpha(next)) {
186 base::char16 last_digit = previous;
187 base::char16 first_letter = base::ToLowerASCII(*it_);
188 base::char16 second_letter = base::ToLowerASCII(next);
189 bool is_teen = SafePreviousChar(it_ - 1, begin_) == '1' &&
190 num_digits_ == 2;
191
192 switch (last_digit - '0') {
193 case 1:
194 if ((first_letter == 's' && second_letter == 't') ||
195 (first_letter == 't' && second_letter == 'h' && is_teen)) {
196 AcceptChars(2);
197 continue;
198 }
199 break;
200
201 case 2:
202 if ((first_letter == 'n' && second_letter == 'd') ||
203 (first_letter == 't' && second_letter == 'h' && is_teen)) {
204 AcceptChars(2);
205 continue;
206 }
207 break;
208
209 case 3:
210 if ((first_letter == 'r' && second_letter == 'd') ||
211 (first_letter == 't' && second_letter == 'h' && is_teen)) {
212 AcceptChars(2);
213 continue;
214 }
215 break;
216
217 case 0:
218 // Explicitly exclude '0th'.
219 if (num_digits_ == 1)
220 break;
221
222 case 4:
223 case 5:
224 case 6:
225 case 7:
226 case 8:
227 case 9:
228 if (first_letter == 't' && second_letter == 'h') {
229 AcceptChars(2);
230 continue;
231 }
232 break;
233
234 default:
235 NOTREACHED();
236 }
237 }
238 }
239
240 RestartOnNextDelimiter();
241 continue;
242 }
243
244 if (*it_ == '-' && num_digits_ > 0) {
245 AcceptChars(1);
246 ++num_digits_;
247 continue;
248 }
249
250 RestartOnNextDelimiter();
251 SkipChars(1);
252 }
253
254 if (iterations >= max_iterations)
255 return false;
256
257 return CheckFinished(word);
258 }
259
260 bool FindStateStartingInWord(WordList* words,
261 size_t state_first_word,
262 size_t* state_last_word,
263 String16Tokenizer* tokenizer,
264 size_t* state_index) {
265
266 // Bitmasks containing the allowed suffixes for 2-letter state codes.
267 static const int state_two_letter_suffix[23] = {
268 0x02060c00, // A followed by: [KLRSZ].
269 0x00000000, // B.
270 0x00084001, // C followed by: [AOT].
271 0x00000014, // D followed by: [CE].
272 0x00000000, // E.
273 0x00001800, // F followed by: [LM].
274 0x00100001, // G followed by: [AU].
275 0x00000100, // H followed by: [I].
276 0x00002809, // I followed by: [ADLN].
277 0x00000000, // J.
278 0x01040000, // K followed by: [SY].
279 0x00000001, // L followed by: [A].
280 0x000ce199, // M followed by: [ADEHINOPST].
281 0x0120129c, // N followed by: [CDEHJMVY].
282 0x00020480, // O followed by: [HKR].
283 0x00420001, // P followed by: [ARW].
284 0x00000000, // Q.
285 0x00000100, // R followed by: [I].
286 0x0000000c, // S followed by: [CD].
287 0x00802000, // T followed by: [NX].
288 0x00080000, // U followed by: [T].
289 0x00080101, // V followed by: [AIT].
290 0x01200101 // W followed by: [AIVY].
291 };
292
293 // Accumulative number of states for the 2-letter code indexed by the first.
294 static const int state_two_letter_accumulative[24] = {
295 0, 5, 5, 8, 10, 10, 12, 14,
296 15, 19, 19, 21, 22, 32, 40, 43,
297 46, 46, 47, 49, 51, 52, 55, 59
298 };
299
300 // State names sorted alphabetically with their lengths.
301 // There can be more than one possible name for a same state if desired.
302 static const struct StateNameInfo {
303 const char* string;
304 char first_word_length;
305 char length;
306 char state_index; // Relative to two-character code alphabetical order.
307 } state_names[59] = {
308 { "alabama", 7, 7, 1 }, { "alaska", 6, 6, 0 },
309 { "american samoa", 8, 14, 3 }, { "arizona", 7, 7, 4 },
310 { "arkansas", 8, 8, 2 },
311 { "california", 10, 10, 5 }, { "colorado", 8, 8, 6 },
312 { "connecticut", 11, 11, 7 }, { "delaware", 8, 8, 9 },
313 { "district of columbia", 8, 20, 8 },
314 { "federated states of micronesia", 9, 30, 11 }, { "florida", 7, 7, 10 },
315 { "guam", 4, 4, 13 }, { "georgia", 7, 7, 12 },
316 { "hawaii", 6, 6, 14 },
317 { "idaho", 5, 5, 16 }, { "illinois", 8, 8, 17 }, { "indiana", 7, 7, 18 },
318 { "iowa", 4, 4, 15 },
319 { "kansas", 6, 6, 19 }, { "kentucky", 8, 8, 20 },
320 { "louisiana", 9, 9, 21 },
321 { "maine", 5, 5, 24 }, { "marshall islands", 8, 16, 25 },
322 { "maryland", 8, 8, 23 }, { "massachusetts", 13, 13, 22 },
323 { "michigan", 8, 8, 26 }, { "minnesota", 9, 9, 27 },
324 { "mississippi", 11, 11, 30 }, { "missouri", 8, 8, 28 },
325 { "montana", 7, 7, 31 },
326 { "nebraska", 8, 8, 34 }, { "nevada", 6, 6, 38 },
327 { "new hampshire", 3, 13, 35 }, { "new jersey", 3, 10, 36 },
328 { "new mexico", 3, 10, 37 }, { "new york", 3, 8, 39 },
329 { "north carolina", 5, 14, 32 }, { "north dakota", 5, 12, 33 },
330 { "northern mariana islands", 8, 24, 29 },
331 { "ohio", 4, 4, 40 }, { "oklahoma", 8, 8, 41 }, { "oregon", 6, 6, 42 },
332 { "palau", 5, 5, 45 }, { "pennsylvania", 12, 12, 43 },
333 { "puerto rico", 6, 11, 44 },
334 { "rhode island", 5, 5, 46 },
335 { "south carolina", 5, 14, 47 }, { "south dakota", 5, 12, 48 },
336 { "tennessee", 9, 9, 49 }, { "texas", 5, 5, 50 },
337 { "utah", 4, 4, 51 },
338 { "vermont", 7, 7, 54 }, { "virgin islands", 6, 14, 53 },
339 { "virginia", 8, 8, 52 },
340 { "washington", 10, 10, 55 }, { "west virginia", 4, 13, 57 },
341 { "wisconsin", 9, 9, 56 }, { "wyoming", 7, 7, 58 }
342 };
343
344 // Accumulative number of states for sorted names indexed by the first letter.
345 // Required a different one since there are codes that don't share their
346 // first letter with the name of their state (MP = Northern Mariana Islands).
347 static const int state_names_accumulative[24] = {
348 0, 5, 5, 8, 10, 10, 12, 14,
349 15, 19, 19, 21, 22, 31, 40, 43,
350 46, 46, 47, 49, 51, 52, 55, 59
351 };
352
353 DCHECK_EQ(state_names_accumulative[arraysize(state_names_accumulative) - 1],
354 static_cast<int>(arraysize(state_names)));
355
356 const Word& first_word = words->at(state_first_word);
357 int length = first_word.end - first_word.begin;
358 if (length < 2 || !base::IsAsciiAlpha(*first_word.begin))
359 return false;
360
361 // No state names start with x, y, z.
362 base::char16 first_letter = base::ToLowerASCII(*first_word.begin);
363 if (first_letter > 'w')
364 return false;
365
366 DCHECK(first_letter >= 'a');
367 int first_index = first_letter - 'a';
368
369 // Look for two-letter state names.
370 if (length == 2 && base::IsAsciiAlpha(*(first_word.begin + 1))) {
371 base::char16 second_letter = base::ToLowerASCII(*(first_word.begin + 1));
372 DCHECK(second_letter >= 'a');
373
374 int second_index = second_letter - 'a';
375 if (!(state_two_letter_suffix[first_index] & (1 << second_index)))
376 return false;
377
378 std::bitset<32> previous_suffixes = state_two_letter_suffix[first_index] &
379 ((1 << second_index) - 1);
380 *state_last_word = state_first_word;
381 *state_index = state_two_letter_accumulative[first_index] +
382 previous_suffixes.count();
383 return true;
384 }
385
386 // Look for full state names by their first letter. Discard by length.
387 for (int state = state_names_accumulative[first_index];
388 state < state_names_accumulative[first_index + 1]; ++state) {
389 if (state_names[state].first_word_length != length)
390 continue;
391
392 bool state_match = false;
393 size_t state_word = state_first_word;
394 for (int pos = 0; true; ) {
395 if (!WordLowerCaseEqualsASCII(words->at(state_word).begin,
396 words->at(state_word).end, &state_names[state].string[pos]))
397 break;
398
399 pos += words->at(state_word).end - words->at(state_word).begin + 1;
400 if (pos >= state_names[state].length) {
401 state_match = true;
402 break;
403 }
404
405 // Ran out of words, extract more from the tokenizer.
406 if (++state_word == words->size()) {
407 do {
408 if (!tokenizer->GetNext())
409 break;
410 } while (tokenizer->token_is_delim());
411 words->push_back(
412 Word(tokenizer->token_begin(), tokenizer->token_end()));
413 }
414 }
415
416 if (state_match) {
417 *state_last_word = state_word;
418 *state_index = state_names[state].state_index;
419 return true;
420 }
421 }
422
423 return false;
424 }
425
426 bool IsZipValid(const Word& word, size_t state_index) {
427 size_t length = word.end - word.begin;
428 if (length != kZipDigits && length != kZipPlus4Digits + 1)
429 return false;
430
431 for (base::string16::const_iterator it = word.begin; it != word.end; ++it) {
432 size_t pos = it - word.begin;
433 if (base::IsAsciiDigit(*it) || (*it == '-' && pos == kZipDigits))
434 continue;
435 return false;
436 }
437 return IsZipValidForState(word, state_index);
438 }
439
440 bool IsZipValidForState(const Word& word, size_t state_index) {
441 // List of valid zip code ranges.
442 static const struct {
443 signed char low;
444 signed char high;
445 signed char exception1;
446 signed char exception2;
447 } zip_range[] = {
448 { 99, 99, -1, -1 }, // AK Alaska.
449 { 35, 36, -1, -1 }, // AL Alabama.
450 { 71, 72, -1, -1 }, // AR Arkansas.
451 { 96, 96, -1, -1 }, // AS American Samoa.
452 { 85, 86, -1, -1 }, // AZ Arizona.
453 { 90, 96, -1, -1 }, // CA California.
454 { 80, 81, -1, -1 }, // CO Colorado.
455 { 6, 6, -1, -1 }, // CT Connecticut.
456 { 20, 20, -1, -1 }, // DC District of Columbia.
457 { 19, 19, -1, -1 }, // DE Delaware.
458 { 32, 34, -1, -1 }, // FL Florida.
459 { 96, 96, -1, -1 }, // FM Federated States of Micronesia.
460 { 30, 31, -1, -1 }, // GA Georgia.
461 { 96, 96, -1, -1 }, // GU Guam.
462 { 96, 96, -1, -1 }, // HI Hawaii.
463 { 50, 52, -1, -1 }, // IA Iowa.
464 { 83, 83, -1, -1 }, // ID Idaho.
465 { 60, 62, -1, -1 }, // IL Illinois.
466 { 46, 47, -1, -1 }, // IN Indiana.
467 { 66, 67, 73, -1 }, // KS Kansas.
468 { 40, 42, -1, -1 }, // KY Kentucky.
469 { 70, 71, -1, -1 }, // LA Louisiana.
470 { 1, 2, -1, -1 }, // MA Massachusetts.
471 { 20, 21, -1, -1 }, // MD Maryland.
472 { 3, 4, -1, -1 }, // ME Maine.
473 { 96, 96, -1, -1 }, // MH Marshall Islands.
474 { 48, 49, -1, -1 }, // MI Michigan.
475 { 55, 56, -1, -1 }, // MN Minnesota.
476 { 63, 65, -1, -1 }, // MO Missouri.
477 { 96, 96, -1, -1 }, // MP Northern Mariana Islands.
478 { 38, 39, -1, -1 }, // MS Mississippi.
479 { 55, 56, -1, -1 }, // MT Montana.
480 { 27, 28, -1, -1 }, // NC North Carolina.
481 { 58, 58, -1, -1 }, // ND North Dakota.
482 { 68, 69, -1, -1 }, // NE Nebraska.
483 { 3, 4, -1, -1 }, // NH New Hampshire.
484 { 7, 8, -1, -1 }, // NJ New Jersey.
485 { 87, 88, 86, -1 }, // NM New Mexico.
486 { 88, 89, 96, -1 }, // NV Nevada.
487 { 10, 14, 0, 6 }, // NY New York.
488 { 43, 45, -1, -1 }, // OH Ohio.
489 { 73, 74, -1, -1 }, // OK Oklahoma.
490 { 97, 97, -1, -1 }, // OR Oregon.
491 { 15, 19, -1, -1 }, // PA Pennsylvania.
492 { 6, 6, 0, 9 }, // PR Puerto Rico.
493 { 96, 96, -1, -1 }, // PW Palau.
494 { 2, 2, -1, -1 }, // RI Rhode Island.
495 { 29, 29, -1, -1 }, // SC South Carolina.
496 { 57, 57, -1, -1 }, // SD South Dakota.
497 { 37, 38, -1, -1 }, // TN Tennessee.
498 { 75, 79, 87, 88 }, // TX Texas.
499 { 84, 84, -1, -1 }, // UT Utah.
500 { 22, 24, 20, -1 }, // VA Virginia.
501 { 6, 9, -1, -1 }, // VI Virgin Islands.
502 { 5, 5, -1, -1 }, // VT Vermont.
503 { 98, 99, -1, -1 }, // WA Washington.
504 { 53, 54, -1, -1 }, // WI Wisconsin.
505 { 24, 26, -1, -1 }, // WV West Virginia.
506 { 82, 83, -1, -1 } // WY Wyoming.
507 };
508
509 // Zip numeric value for the first two characters.
510 DCHECK(word.begin != word.end);
511 DCHECK(base::IsAsciiDigit(*word.begin));
512 DCHECK(base::IsAsciiDigit(*(word.begin + 1)));
513 int zip_prefix = (*word.begin - '0') * 10 + (*(word.begin + 1) - '0');
514
515 if ((zip_prefix >= zip_range[state_index].low &&
516 zip_prefix <= zip_range[state_index].high) ||
517 zip_prefix == zip_range[state_index].exception1 ||
518 zip_prefix == zip_range[state_index].exception2) {
519 return true;
520 }
521 return false;
522 }
523
524 bool IsValidLocationName(const Word& word) {
525 // Supported location names sorted alphabetically and grouped by first letter.
526 static const struct LocationNameInfo {
527 const char* string;
528 char length;
529 bool allow_plural;
530 } location_names[159] = {
531 { "alley", 5, false }, { "annex", 5, false }, { "arcade", 6, false },
532 { "ave", 3, false }, { "ave.", 4, false }, { "avenue", 6, false },
533 { "alameda", 7, false },
534 { "bayou", 5, false }, { "beach", 5, false }, { "bend", 4, false },
535 { "bluff", 5, true }, { "bottom", 6, false }, { "boulevard", 9, false },
536 { "branch", 6, false }, { "bridge", 6, false }, { "brook", 5, true },
537 { "burg", 4, true }, { "bypass", 6, false }, { "broadway", 8, false },
538 { "camino", 6, false }, { "camp", 4, false }, { "canyon", 6, false },
539 { "cape", 4, false }, { "causeway", 8, false }, { "center", 6, true },
540 { "circle", 6, true }, { "cliff", 5, true }, { "club", 4, false },
541 { "common", 6, false }, { "corner", 6, true }, { "course", 6, false },
542 { "court", 5, true }, { "cove", 4, true }, { "creek", 5, false },
543 { "crescent", 8, false }, { "crest", 5, false }, { "crossing", 8, false },
544 { "crossroad", 9, false }, { "curve", 5, false }, { "circulo", 7, false },
545 { "dale", 4, false }, { "dam", 3, false }, { "divide", 6, false },
546 { "drive", 5, true },
547 { "estate", 6, true }, { "expressway", 10, false },
548 { "extension", 9, true },
549 { "fall", 4, true }, { "ferry", 5, false }, { "field", 5, true },
550 { "flat", 4, true }, { "ford", 4, true }, { "forest", 6, false },
551 { "forge", 5, true }, { "fork", 4, true }, { "fort", 4, false },
552 { "freeway", 7, false },
553 { "garden", 6, true }, { "gateway", 7, false }, { "glen", 4, true },
554 { "green", 5, true }, { "grove", 5, true },
555 { "harbor", 6, true }, { "haven", 5, false }, { "heights", 7, false },
556 { "highway", 7, false }, { "hill", 4, true }, { "hollow", 6, false },
557 { "inlet", 5, false }, { "island", 6, true }, { "isle", 4, false },
558 { "junction", 8, true },
559 { "key", 3, true }, { "knoll", 5, true },
560 { "lake", 4, true }, { "land", 4, false }, { "landing", 7, false },
561 { "lane", 4, false }, { "light", 5, true }, { "loaf", 4, false },
562 { "lock", 4, true }, { "lodge", 5, false }, { "loop", 4, false },
563 { "mall", 4, false }, { "manor", 5, true }, { "meadow", 6, true },
564 { "mews", 4, false }, { "mill", 4, true }, { "mission", 7, false },
565 { "motorway", 8, false }, { "mount", 5, false }, { "mountain", 8, true },
566 { "neck", 4, false },
567 { "orchard", 7, false }, { "oval", 4, false }, { "overpass", 8, false },
568 { "park", 4, true }, { "parkway", 7, true }, { "pass", 4, false },
569 { "passage", 7, false }, { "path", 4, false }, { "pike", 4, false },
570 { "pine", 4, true }, { "plain", 5, true }, { "plaza", 5, false },
571 { "point", 5, true }, { "port", 4, true }, { "prairie", 7, false },
572 { "privada", 7, false },
573 { "radial", 6, false }, { "ramp", 4, false }, { "ranch", 5, false },
574 { "rapid", 5, true }, { "rd", 2, false }, { "rd.", 3, false },
575 { "rest", 4, false }, { "ridge", 5, true }, { "river", 5, false },
576 { "road", 4, true }, { "route", 5, false }, { "row", 3, false },
577 { "rue", 3, false }, { "run", 3, false },
578 { "shoal", 5, true }, { "shore", 5, true }, { "skyway", 6, false },
579 { "spring", 6, true }, { "spur", 4, true }, { "square", 6, true },
580 { "station", 7, false }, { "stravenue", 9, false }, { "stream", 6, false },
581 { "st", 2, false }, { "st.", 3, false }, { "street", 6, true },
582 { "summit", 6, false }, { "speedway", 8, false },
583 { "terrace", 7, false }, { "throughway", 10, false }, { "trace", 5, false },
584 { "track", 5, false }, { "trafficway", 10, false }, { "trail", 5, false },
585 { "tunnel", 6, false }, { "turnpike", 8, false },
586 { "underpass", 9, false }, { "union", 5, true },
587 { "valley", 6, true }, { "viaduct", 7, false }, { "view", 4, true },
588 { "village", 7, true }, { "ville", 5, false }, { "vista", 5, false },
589 { "walk", 4, true }, { "wall", 4, false }, { "way", 3, true },
590 { "well", 4, true },
591 { "xing", 4, false }, { "xrd", 3, false }
592 };
593
594 // Accumulative number of location names for each starting letter.
595 static const int location_names_accumulative[25] = {
596 0, 7, 19, 40, 44,
597 47, 57, 62, 68, 71,
598 72, 74, 83, 92, 93,
599 96, 109, 109, 123, 137,
600 145, 147, 153, 157, 159
601 };
602
603 DCHECK_EQ(
604 location_names_accumulative[arraysize(location_names_accumulative) - 1],
605 static_cast<int>(arraysize(location_names)));
606
607 if (!base::IsAsciiAlpha(*word.begin))
608 return false;
609
610 // No location names start with y, z.
611 base::char16 first_letter = base::ToLowerASCII(*word.begin);
612 if (first_letter > 'x')
613 return false;
614
615 DCHECK(first_letter >= 'a');
616 int index = first_letter - 'a';
617 int length = std::distance(word.begin, word.end);
618 for (int i = location_names_accumulative[index];
619 i < location_names_accumulative[index + 1]; ++i) {
620 if (location_names[i].length != length &&
621 (location_names[i].allow_plural &&
622 location_names[i].length + 1 != length)) {
623 continue;
624 }
625
626 if (LowerCaseEqualsASCIIWithPlural(word.begin, word.end,
627 location_names[i].string,
628 location_names[i].allow_plural)) {
629 return true;
630 }
631 }
632
633 return false;
634 }
635
636 } // namespace internal
637
638 } // namespace address_parser
639
640 } // namespace content
OLDNEW
« no previous file with comments | « content/common/android/address_parser_internal.h ('k') | content/common/android/address_parser_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698