| Index: android_webview/native/address_parser.cc
|
| diff --git a/android_webview/native/address_parser.cc b/android_webview/native/address_parser.cc
|
| deleted file mode 100644
|
| index 62dc17f811e8303b57a5afd34544e4335b603dcd..0000000000000000000000000000000000000000
|
| --- a/android_webview/native/address_parser.cc
|
| +++ /dev/null
|
| @@ -1,225 +0,0 @@
|
| -// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| -// Use of this source code is governed by a BSD-style license that can be
|
| -// found in the LICENSE file.
|
| -
|
| -#include "android_webview/native/address_parser.h"
|
| -
|
| -#include "android_webview/native/address_parser_internal.h"
|
| -#include "base/logging.h"
|
| -#include "base/strings/string_util.h"
|
| -
|
| -namespace {
|
| -
|
| -// Minimum number of words in an address after the house number
|
| -// before a state is expected to be found.
|
| -// A value too high can miss short addresses.
|
| -const size_t kMinAddressWords = 3;
|
| -
|
| -// Maximum number of words allowed in an address between the house number
|
| -// and the state, both not included.
|
| -const size_t kMaxAddressWords = 12;
|
| -
|
| -// Maximum number of lines allowed in an address between the house number
|
| -// and the state, both not included.
|
| -const size_t kMaxAddressLines = 5;
|
| -
|
| -// Maximum length allowed for any address word between the house number
|
| -// and the state, both not included.
|
| -const size_t kMaxAddressNameWordLength = 25;
|
| -
|
| -// Maximum number of words after the house number in which the location name
|
| -// should be found.
|
| -const size_t kMaxLocationNameDistance = 4;
|
| -
|
| -// Additional characters used as new line delimiters.
|
| -const base::char16 kNewlineDelimiters[] = {
|
| - '\n', ',', '*',
|
| - 0x2022, // Unicode bullet
|
| - 0,
|
| -};
|
| -
|
| -} // anonymous namespace
|
| -
|
| -namespace android_webview {
|
| -
|
| -namespace address_parser {
|
| -
|
| -using namespace internal;
|
| -
|
| -bool FindAddress(const base::string16& text, base::string16* address) {
|
| - size_t start, end;
|
| - if (FindAddress(text.begin(), text.end(), &start, &end)) {
|
| - size_t len = end >= start ? end - start : 0;
|
| - address->assign(text.substr(start, len));
|
| - return true;
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -bool FindAddress(const base::string16::const_iterator& begin,
|
| - const base::string16::const_iterator& end,
|
| - size_t* start_pos,
|
| - size_t* end_pos) {
|
| - HouseNumberParser house_number_parser;
|
| -
|
| - // Keep going through the input string until a potential house number is
|
| - // detected. Start tokenizing the following words to find a valid
|
| - // street name within a word range. Then, find a state name followed
|
| - // by a valid zip code for that state. Also keep a look for any other
|
| - // possible house numbers to continue from in case of no match and for
|
| - // state names not followed by a zip code (e.g. New York, NY 10000).
|
| - const base::string16 newline_delimiters = kNewlineDelimiters;
|
| - const base::string16 delimiters = base::kWhitespaceUTF16 + newline_delimiters;
|
| - for (base::string16::const_iterator it = begin; it != end;) {
|
| - Word house_number;
|
| - if (!house_number_parser.Parse(it, end, &house_number))
|
| - return false;
|
| -
|
| - String16Tokenizer tokenizer(house_number.end, end, delimiters);
|
| - tokenizer.set_options(String16Tokenizer::RETURN_DELIMS);
|
| -
|
| - WordList words;
|
| - words.push_back(house_number);
|
| -
|
| - bool found_location_name = false;
|
| - bool continue_on_house_number = true;
|
| - bool consecutive_house_numbers = true;
|
| - size_t next_house_number_word = 0;
|
| - size_t num_lines = 1;
|
| -
|
| - // Don't include the house number in the word count.
|
| - size_t next_word = 1;
|
| - for (; next_word <= kMaxAddressWords + 1; ++next_word) {
|
| - // Extract a new word from the tokenizer.
|
| - if (next_word == words.size()) {
|
| - do {
|
| - if (!tokenizer.GetNext())
|
| - return false;
|
| -
|
| - // Check the number of address lines.
|
| - if (tokenizer.token_is_delim() &&
|
| - newline_delimiters.find(*tokenizer.token_begin()) !=
|
| - base::string16::npos) {
|
| - ++num_lines;
|
| - }
|
| - } while (tokenizer.token_is_delim());
|
| -
|
| - if (num_lines > kMaxAddressLines)
|
| - break;
|
| -
|
| - words.push_back(Word(tokenizer.token_begin(), tokenizer.token_end()));
|
| - }
|
| -
|
| - // Check the word length. If too long, don't try to continue from
|
| - // the next house number as no address can hold this word.
|
| - const Word& current_word = words[next_word];
|
| - DCHECK_GT(std::distance(current_word.begin, current_word.end), 0);
|
| - size_t current_word_length =
|
| - std::distance(current_word.begin, current_word.end);
|
| - if (current_word_length > kMaxAddressNameWordLength) {
|
| - continue_on_house_number = false;
|
| - break;
|
| - }
|
| -
|
| - // Check if the new word is a valid house number.
|
| - if (house_number_parser.Parse(current_word.begin, current_word.end,
|
| - NULL)) {
|
| - // Increase the number of consecutive house numbers since the beginning.
|
| - if (consecutive_house_numbers) {
|
| - // Check if there is a new line between consecutive house numbers.
|
| - // This avoids false positives of the form "Cafe 21\n 750 Fifth Ave.."
|
| - if (num_lines > 1) {
|
| - next_house_number_word = next_word;
|
| - break;
|
| - }
|
| - }
|
| -
|
| - // Keep the next candidate to resume parsing from in case of failure.
|
| - if (next_house_number_word == 0) {
|
| - next_house_number_word = next_word;
|
| - continue;
|
| - }
|
| - } else {
|
| - consecutive_house_numbers = false;
|
| - }
|
| -
|
| - // Look for location names in the words after the house number.
|
| - // A range limitation is introduced to avoid matching
|
| - // anything that starts with a number before a legitimate address.
|
| - if (next_word <= kMaxLocationNameDistance &&
|
| - IsValidLocationName(current_word)) {
|
| - found_location_name = true;
|
| - continue;
|
| - }
|
| -
|
| - // Don't count the house number.
|
| - if (next_word > kMinAddressWords) {
|
| - // Looking for the state is likely to add new words to the list while
|
| - // checking for multi-word state names.
|
| - size_t state_first_word = next_word;
|
| - size_t state_last_word, state_index;
|
| - if (FindStateStartingInWord(&words, state_first_word, &state_last_word,
|
| - &tokenizer, &state_index)) {
|
| - // A location name should have been found at this point.
|
| - if (!found_location_name)
|
| - break;
|
| -
|
| - // Explicitly exclude "et al", as "al" is a valid state code.
|
| - if (current_word_length == 2 && words.size() > 2) {
|
| - const Word& previous_word = words[state_first_word - 1];
|
| - if (previous_word.end - previous_word.begin == 2 &&
|
| - base::LowerCaseEqualsASCII(
|
| - base::StringPiece16(previous_word.begin, previous_word.end),
|
| - "et") &&
|
| - base::LowerCaseEqualsASCII(
|
| - base::StringPiece16(current_word.begin, current_word.end),
|
| - "al"))
|
| - break;
|
| - }
|
| -
|
| - // Extract one more word from the tokenizer if not already available.
|
| - size_t zip_word = state_last_word + 1;
|
| - if (zip_word == words.size()) {
|
| - do {
|
| - if (!tokenizer.GetNext()) {
|
| - // The address ends with a state name without a zip code. This
|
| - // is legal according to WebView#findAddress public
|
| - // documentation.
|
| - *start_pos = words[0].begin - begin;
|
| - *end_pos = words[state_last_word].end - begin;
|
| - return true;
|
| - }
|
| - } while (tokenizer.token_is_delim());
|
| - words.push_back(
|
| - Word(tokenizer.token_begin(), tokenizer.token_end()));
|
| - }
|
| -
|
| - // Check the parsing validity and state range of the zip code.
|
| - next_word = state_last_word;
|
| - if (!IsZipValid(words[zip_word], state_index))
|
| - continue;
|
| -
|
| - *start_pos = words[0].begin - begin;
|
| - *end_pos = words[zip_word].end - begin;
|
| - return true;
|
| - }
|
| - }
|
| - }
|
| -
|
| - // Avoid skipping too many words because of a non-address number
|
| - // at the beginning of the contents to parse.
|
| - if (continue_on_house_number && next_house_number_word > 0) {
|
| - it = words[next_house_number_word].begin;
|
| - } else {
|
| - DCHECK(!words.empty());
|
| - next_word = std::min(next_word, words.size() - 1);
|
| - it = words[next_word].end;
|
| - }
|
| - }
|
| -
|
| - return false;
|
| -}
|
| -
|
| -} // namespace address_parser
|
| -
|
| -} // namespace android_webview
|
|
|