Index: components/autofill/core/browser/address_field.cc |
diff --git a/components/autofill/core/browser/address_field.cc b/components/autofill/core/browser/address_field.cc |
index bf96dc2e75860ca64d95a8be1e20ca9be5360e7a..b9948dbcdda7a76b668afd1348e20114cccf44cb 100644 |
--- a/components/autofill/core/browser/address_field.cc |
+++ b/components/autofill/core/browser/address_field.cc |
@@ -8,16 +8,12 @@ |
#include "base/logging.h" |
#include "base/memory/scoped_ptr.h" |
-#include "base/strings/string16.h" |
#include "base/strings/string_util.h" |
-#include "base/strings/utf_string_conversions.h" |
#include "components/autofill/core/browser/autofill_field.h" |
#include "components/autofill/core/browser/autofill_regex_constants.h" |
#include "components/autofill/core/browser/autofill_scanner.h" |
#include "components/autofill/core/browser/field_types.h" |
-using base::UTF8ToUTF16; |
- |
namespace autofill { |
namespace { |
@@ -48,9 +44,6 @@ scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { |
const AutofillField* const initial_field = scanner->Cursor(); |
size_t saved_cursor = scanner->SaveCursor(); |
- base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe); |
- base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe); |
- |
// Allow address fields to appear in any order. |
size_t begin_trailing_non_labeled_fields = 0; |
bool has_trailing_non_labeled_fields = false; |
@@ -62,8 +55,8 @@ scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { |
address_field->ParseCompany(scanner)) { |
has_trailing_non_labeled_fields = false; |
continue; |
- } else if (ParseField(scanner, attention_ignored, NULL) || |
- ParseField(scanner, region_ignored, NULL)) { |
+ } else if (ParseField(scanner, kAttentionIgnoredRe, NULL) || |
+ ParseField(scanner, kRegionIgnoredRe, NULL)) { |
// We ignore the following: |
// * Attention. |
// * Province/Region/Other. |
@@ -148,7 +141,7 @@ bool AddressField::ParseCompany(AutofillScanner* scanner) { |
if (company_ && !company_->IsEmpty()) |
return false; |
- return ParseField(scanner, UTF8ToUTF16(kCompanyRe), &company_); |
+ return ParseField(scanner, kCompanyRe, &company_); |
} |
bool AddressField::ParseAddressLines(AutofillScanner* scanner) { |
@@ -164,19 +157,17 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) { |
return false; |
// Ignore "Address Lookup" field. http://crbug.com/427622 |
- if (ParseField(scanner, base::UTF8ToUTF16(kAddressLookupRe), NULL)) |
+ if (ParseField(scanner, kAddressLookupRe, NULL)) |
return false; |
- base::string16 pattern = UTF8ToUTF16(kAddressLine1Re); |
- base::string16 label_pattern = UTF8ToUTF16(kAddressLine1LabelRe); |
- if (!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, &address1_) && |
- !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
+ if (!ParseFieldSpecifics(scanner, kAddressLine1Re, MATCH_DEFAULT, |
&address1_) && |
- !ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_TEXT_AREA, |
- &street_address_) && |
- !ParseFieldSpecifics(scanner, label_pattern, |
- MATCH_LABEL | MATCH_TEXT_AREA, |
- &street_address_)) |
+ !ParseFieldSpecifics(scanner, kAddressLine1LabelRe, |
+ MATCH_LABEL | MATCH_TEXT, &address1_) && |
+ !ParseFieldSpecifics(scanner, kAddressLine1Re, |
+ MATCH_DEFAULT | MATCH_TEXT_AREA, &street_address_) && |
+ !ParseFieldSpecifics(scanner, kAddressLine1LabelRe, |
+ MATCH_LABEL | MATCH_TEXT_AREA, &street_address_)) |
return false; |
if (street_address_) |
@@ -185,19 +176,16 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) { |
// This code may not pick up pages that have an address field consisting of a |
// sequence of unlabeled address fields. If we need to add this, see |
// discussion on https://codereview.chromium.org/741493003/ |
- pattern = UTF8ToUTF16(kAddressLine2Re); |
- label_pattern = UTF8ToUTF16(kAddressLine2LabelRe); |
- if (!ParseField(scanner, pattern, &address2_) && |
- !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
- &address2_)) |
+ if (!ParseField(scanner, kAddressLine2Re, &address2_) && |
+ !ParseFieldSpecifics(scanner, kAddressLine2LabelRe, |
+ MATCH_LABEL | MATCH_TEXT, &address2_)) |
return true; |
// Optionally parse address line 3. This uses the same label regexp as |
// address 2 above. |
- pattern = UTF8ToUTF16(kAddressLinesExtraRe); |
- if (!ParseField(scanner, pattern, &address3_) && |
- !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
- &address3_)) |
+ if (!ParseField(scanner, kAddressLinesExtraRe, &address3_) && |
+ !ParseFieldSpecifics(scanner, kAddressLine2LabelRe, |
+ MATCH_LABEL | MATCH_TEXT, &address3_)) |
return true; |
// Try for surplus lines, which we will promptly discard. Some pages have 4 |
@@ -205,8 +193,7 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) { |
// |
// Since these are rare, don't bother considering unlabeled lines as extra |
// address lines. |
- pattern = UTF8ToUTF16(kAddressLinesExtraRe); |
- while (ParseField(scanner, pattern, NULL)) { |
+ while (ParseField(scanner, kAddressLinesExtraRe, NULL)) { |
// Consumed a surplus line, try for another. |
} |
return true; |
@@ -217,9 +204,7 @@ bool AddressField::ParseCountry(AutofillScanner* scanner) { |
return false; |
scanner->SaveCursor(); |
- if (ParseFieldSpecifics(scanner, |
- UTF8ToUTF16(kCountryRe), |
- MATCH_DEFAULT | MATCH_SELECT, |
+ if (ParseFieldSpecifics(scanner, kCountryRe, MATCH_DEFAULT | MATCH_SELECT, |
&country_)) { |
return true; |
} |
@@ -227,8 +212,7 @@ bool AddressField::ParseCountry(AutofillScanner* scanner) { |
// The occasional page (e.g. google account registration page) calls this a |
// "location". However, this only makes sense for select tags. |
scanner->Rewind(); |
- return ParseFieldSpecifics(scanner, |
- UTF8ToUTF16(kCountryLocationRe), |
+ return ParseFieldSpecifics(scanner, kCountryLocationRe, |
MATCH_LABEL | MATCH_NAME | MATCH_SELECT, |
&country_); |
} |
@@ -237,16 +221,13 @@ bool AddressField::ParseZipCode(AutofillScanner* scanner) { |
if (zip_) |
return false; |
- if (!ParseFieldSpecifics(scanner, |
- UTF8ToUTF16(kZipCodeRe), |
- kZipCodeMatchType, |
- &zip_)) { |
+ if (!ParseFieldSpecifics(scanner, kZipCodeRe, kZipCodeMatchType, &zip_)) { |
return false; |
} |
// Look for a zip+4, whose field name will also often contain |
// the substring "zip". |
- ParseFieldSpecifics(scanner, UTF8ToUTF16(kZip4Re), kZipCodeMatchType, &zip4_); |
+ ParseFieldSpecifics(scanner, kZip4Re, kZipCodeMatchType, &zip4_); |
return true; |
} |
@@ -254,20 +235,21 @@ bool AddressField::ParseCity(AutofillScanner* scanner) { |
if (city_) |
return false; |
- return ParseFieldSpecifics(scanner, |
- UTF8ToUTF16(kCityRe), |
- kCityMatchType, |
- &city_); |
+ return ParseFieldSpecifics(scanner, kCityRe, kCityMatchType, &city_); |
} |
bool AddressField::ParseState(AutofillScanner* scanner) { |
if (state_) |
return false; |
- return ParseFieldSpecifics(scanner, |
- UTF8ToUTF16(kStateRe), |
- kStateMatchType, |
- &state_); |
+ // Ignore spurious matches for "United States". |
Evan Stade
2015/12/08 19:34:56
where is this coming from? is this a new addition?
Ilya Sherman
2015/12/08 20:02:28
It's coming from a change to one of the regexes, w
|
+ size_t saved_cursor = scanner->SaveCursor(); |
+ if (ParseFieldSpecifics(scanner, "United States", kStateMatchType, nullptr)) { |
+ scanner->RewindTo(saved_cursor); |
+ return false; |
+ } |
+ |
+ return ParseFieldSpecifics(scanner, kStateRe, kStateMatchType, &state_); |
} |
bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) { |
@@ -329,7 +311,7 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode( |
return RESULT_MATCH_NONE; |
ParseNameLabelResult result = ParseNameAndLabelSeparately( |
- scanner, UTF8ToUTF16(kZipCodeRe), kZipCodeMatchType, &zip_); |
+ scanner, kZipCodeRe, kZipCodeMatchType, &zip_); |
if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd()) |
return result; |
@@ -349,10 +331,7 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode( |
if (!found_non_zip4) { |
// Look for a zip+4, whose field name will also often contain |
// the substring "zip". |
- ParseFieldSpecifics(scanner, |
- UTF8ToUTF16(kZip4Re), |
- kZipCodeMatchType, |
- &zip4_); |
+ ParseFieldSpecifics(scanner, kZip4Re, kZipCodeMatchType, &zip4_); |
} |
return result; |
} |
@@ -362,8 +341,7 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity( |
if (city_) |
return RESULT_MATCH_NONE; |
- return ParseNameAndLabelSeparately( |
- scanner, UTF8ToUTF16(kCityRe), kCityMatchType, &city_); |
+ return ParseNameAndLabelSeparately(scanner, kCityRe, kCityMatchType, &city_); |
} |
AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState( |
@@ -371,8 +349,14 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState( |
if (state_) |
return RESULT_MATCH_NONE; |
- return ParseNameAndLabelSeparately( |
- scanner, UTF8ToUTF16(kStateRe), kStateMatchType, &state_); |
+ size_t saved_cursor = scanner->SaveCursor(); |
+ if (ParseFieldSpecifics(scanner, "United States", kStateMatchType, nullptr)) { |
+ scanner->RewindTo(saved_cursor); |
+ return RESULT_MATCH_NONE; |
+ } |
+ |
+ return ParseNameAndLabelSeparately(scanner, kStateRe, kStateMatchType, |
+ &state_); |
} |
} // namespace autofill |