Index: content/common/android/address_parser.cc |
diff --git a/content/common/android/address_parser.cc b/content/common/android/address_parser.cc |
deleted file mode 100644 |
index bed843d160c1dcb3cc60071c79e0d00f81eec292..0000000000000000000000000000000000000000 |
--- a/content/common/android/address_parser.cc |
+++ /dev/null |
@@ -1,228 +0,0 @@ |
-// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
-// Use of this source code is governed by a BSD-style license that can be |
-// found in the LICENSE file. |
- |
-#include "content/common/android/address_parser.h" |
- |
-#include "base/logging.h" |
-#include "base/strings/string_util.h" |
-#include "content/common/android/address_parser_internal.h" |
- |
-namespace { |
- |
-// Minimum number of words in an address after the house number |
-// before a state is expected to be found. |
-// A value too high can miss short addresses. |
-const size_t kMinAddressWords = 3; |
- |
-// Maximum number of words allowed in an address between the house number |
-// and the state, both not included. |
-const size_t kMaxAddressWords = 12; |
- |
-// Maximum number of lines allowed in an address between the house number |
-// and the state, both not included. |
-const size_t kMaxAddressLines = 5; |
- |
-// Maximum length allowed for any address word between the house number |
-// and the state, both not included. |
-const size_t kMaxAddressNameWordLength = 25; |
- |
-// Maximum number of words after the house number in which the location name |
-// should be found. |
-const size_t kMaxLocationNameDistance = 4; |
- |
-// Additional characters used as new line delimiters. |
-const base::char16 kNewlineDelimiters[] = { |
- '\n', |
- ',', |
- '*', |
- 0x2022, // Unicode bullet |
- 0, |
-}; |
- |
-} // anonymous namespace |
- |
-namespace content { |
- |
-namespace address_parser { |
- |
-using namespace internal; |
- |
-bool FindAddress(const base::string16& text, base::string16* address) { |
- size_t start, end; |
- if (FindAddress(text.begin(), text.end(), &start, &end)) { |
- size_t len = end >= start ? end - start : 0; |
- address->assign(text.substr(start, len)); |
- return true; |
- } |
- return false; |
-} |
- |
-bool FindAddress(const base::string16::const_iterator& begin, |
- const base::string16::const_iterator& end, |
- size_t* start_pos, |
- size_t* end_pos) { |
- HouseNumberParser house_number_parser; |
- |
- // Keep going through the input string until a potential house number is |
- // detected. Start tokenizing the following words to find a valid |
- // street name within a word range. Then, find a state name followed |
- // by a valid zip code for that state. Also keep a look for any other |
- // possible house numbers to continue from in case of no match and for |
- // state names not followed by a zip code (e.g. New York, NY 10000). |
- const base::string16 newline_delimiters = kNewlineDelimiters; |
- const base::string16 delimiters = base::kWhitespaceUTF16 + newline_delimiters; |
- for (base::string16::const_iterator it = begin; it != end; ) { |
- Word house_number; |
- if (!house_number_parser.Parse(it, end, &house_number)) |
- return false; |
- |
- String16Tokenizer tokenizer(house_number.end, end, delimiters); |
- tokenizer.set_options(String16Tokenizer::RETURN_DELIMS); |
- |
- WordList words; |
- words.push_back(house_number); |
- |
- bool found_location_name = false; |
- bool continue_on_house_number = true; |
- bool consecutive_house_numbers = true; |
- size_t next_house_number_word = 0; |
- size_t num_lines = 1; |
- |
- // Don't include the house number in the word count. |
- size_t next_word = 1; |
- for (; next_word <= kMaxAddressWords + 1; ++next_word) { |
- |
- // Extract a new word from the tokenizer. |
- if (next_word == words.size()) { |
- do { |
- if (!tokenizer.GetNext()) |
- return false; |
- |
- // Check the number of address lines. |
- if (tokenizer.token_is_delim() && newline_delimiters.find( |
- *tokenizer.token_begin()) != base::string16::npos) { |
- ++num_lines; |
- } |
- } while (tokenizer.token_is_delim()); |
- |
- if (num_lines > kMaxAddressLines) |
- break; |
- |
- words.push_back(Word(tokenizer.token_begin(), tokenizer.token_end())); |
- } |
- |
- // Check the word length. If too long, don't try to continue from |
- // the next house number as no address can hold this word. |
- const Word& current_word = words[next_word]; |
- DCHECK_GT(std::distance(current_word.begin, current_word.end), 0); |
- size_t current_word_length = std::distance( |
- current_word.begin, current_word.end); |
- if (current_word_length > kMaxAddressNameWordLength) { |
- continue_on_house_number = false; |
- break; |
- } |
- |
- // Check if the new word is a valid house number. |
- if (house_number_parser.Parse(current_word.begin, current_word.end, |
- NULL)) { |
- // Increase the number of consecutive house numbers since the beginning. |
- if (consecutive_house_numbers) { |
- // Check if there is a new line between consecutive house numbers. |
- // This avoids false positives of the form "Cafe 21\n 750 Fifth Ave.." |
- if (num_lines > 1) { |
- next_house_number_word = next_word; |
- break; |
- } |
- } |
- |
- // Keep the next candidate to resume parsing from in case of failure. |
- if (next_house_number_word == 0) { |
- next_house_number_word = next_word; |
- continue; |
- } |
- } else { |
- consecutive_house_numbers = false; |
- } |
- |
- // Look for location names in the words after the house number. |
- // A range limitation is introduced to avoid matching |
- // anything that starts with a number before a legitimate address. |
- if (next_word <= kMaxLocationNameDistance && |
- IsValidLocationName(current_word)) { |
- found_location_name = true; |
- continue; |
- } |
- |
- // Don't count the house number. |
- if (next_word > kMinAddressWords) { |
- // Looking for the state is likely to add new words to the list while |
- // checking for multi-word state names. |
- size_t state_first_word = next_word; |
- size_t state_last_word, state_index; |
- if (FindStateStartingInWord(&words, state_first_word, &state_last_word, |
- &tokenizer, &state_index)) { |
- |
- // A location name should have been found at this point. |
- if (!found_location_name) |
- break; |
- |
- // Explicitly exclude "et al", as "al" is a valid state code. |
- if (current_word_length == 2 && words.size() > 2) { |
- const Word& previous_word = words[state_first_word - 1]; |
- if (previous_word.end - previous_word.begin == 2 && |
- base::LowerCaseEqualsASCII( |
- base::StringPiece16(previous_word.begin, previous_word.end), |
- "et") && |
- base::LowerCaseEqualsASCII( |
- base::StringPiece16(current_word.begin, current_word.end), |
- "al")) |
- break; |
- } |
- |
- // Extract one more word from the tokenizer if not already available. |
- size_t zip_word = state_last_word + 1; |
- if (zip_word == words.size()) { |
- do { |
- if (!tokenizer.GetNext()) { |
- // The address ends with a state name without a zip code. This |
- // is legal according to WebView#findAddress public |
- // documentation. |
- *start_pos = words[0].begin - begin; |
- *end_pos = words[state_last_word].end - begin; |
- return true; |
- } |
- } while (tokenizer.token_is_delim()); |
- words.push_back(Word(tokenizer.token_begin(), |
- tokenizer.token_end())); |
- } |
- |
- // Check the parsing validity and state range of the zip code. |
- next_word = state_last_word; |
- if (!IsZipValid(words[zip_word], state_index)) |
- continue; |
- |
- *start_pos = words[0].begin - begin; |
- *end_pos = words[zip_word].end - begin; |
- return true; |
- } |
- } |
- } |
- |
- // Avoid skipping too many words because of a non-address number |
- // at the beginning of the contents to parse. |
- if (continue_on_house_number && next_house_number_word > 0) { |
- it = words[next_house_number_word].begin; |
- } else { |
- DCHECK(!words.empty()); |
- next_word = std::min(next_word, words.size() - 1); |
- it = words[next_word].end; |
- } |
- } |
- |
- return false; |
-} |
- |
-} // namespace address_parser |
- |
-} // namespace content |