Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(107)

Unified Diff: components/autofill/core/browser/address_field.cc

Issue 1453193002: autofill: switch autofill_regexes to RE2 library (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: 14 tests failing Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/autofill/core/browser/address_field.cc
diff --git a/components/autofill/core/browser/address_field.cc b/components/autofill/core/browser/address_field.cc
index bf96dc2e75860ca64d95a8be1e20ca9be5360e7a..ce1ca7e2d19284203f399b7f4ed118ff05972127 100644
--- a/components/autofill/core/browser/address_field.cc
+++ b/components/autofill/core/browser/address_field.cc
@@ -16,6 +16,7 @@
#include "components/autofill/core/browser/autofill_scanner.h"
#include "components/autofill/core/browser/field_types.h"
+using base::ASCIIToUTF16;
using base::UTF8ToUTF16;
Ilya Sherman 2015/11/26 02:25:09 Are these needed?
tfarina 2015/11/26 14:22:27 Done.
namespace autofill {
@@ -48,9 +49,6 @@ scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) {
const AutofillField* const initial_field = scanner->Cursor();
size_t saved_cursor = scanner->SaveCursor();
- base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe);
- base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe);
-
// Allow address fields to appear in any order.
size_t begin_trailing_non_labeled_fields = 0;
bool has_trailing_non_labeled_fields = false;
@@ -62,8 +60,8 @@ scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) {
address_field->ParseCompany(scanner)) {
has_trailing_non_labeled_fields = false;
continue;
- } else if (ParseField(scanner, attention_ignored, NULL) ||
- ParseField(scanner, region_ignored, NULL)) {
+ } else if (ParseField(scanner, kAttentionIgnoredRe, NULL) ||
+ ParseField(scanner, kRegionIgnoredRe, NULL)) {
// We ignore the following:
// * Attention.
// * Province/Region/Other.
@@ -148,7 +146,7 @@ bool AddressField::ParseCompany(AutofillScanner* scanner) {
if (company_ && !company_->IsEmpty())
return false;
- return ParseField(scanner, UTF8ToUTF16(kCompanyRe), &company_);
+ return ParseField(scanner, kCompanyRe, &company_);
}
bool AddressField::ParseAddressLines(AutofillScanner* scanner) {
@@ -164,19 +162,17 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) {
return false;
// Ignore "Address Lookup" field. http://crbug.com/427622
- if (ParseField(scanner, base::UTF8ToUTF16(kAddressLookupRe), NULL))
+ if (ParseField(scanner, kAddressLookupRe, NULL))
return false;
- base::string16 pattern = UTF8ToUTF16(kAddressLine1Re);
- base::string16 label_pattern = UTF8ToUTF16(kAddressLine1LabelRe);
- if (!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, &address1_) &&
- !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
+ if (!ParseFieldSpecifics(scanner, kAddressLine1Re, MATCH_DEFAULT,
&address1_) &&
- !ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_TEXT_AREA,
- &street_address_) &&
- !ParseFieldSpecifics(scanner, label_pattern,
- MATCH_LABEL | MATCH_TEXT_AREA,
- &street_address_))
+ !ParseFieldSpecifics(scanner, kAddressLine1LabelRe,
+ MATCH_LABEL | MATCH_TEXT, &address1_) &&
+ !ParseFieldSpecifics(scanner, kAddressLine1Re,
+ MATCH_DEFAULT | MATCH_TEXT_AREA, &street_address_) &&
+ !ParseFieldSpecifics(scanner, kAddressLine1LabelRe,
+ MATCH_LABEL | MATCH_TEXT_AREA, &street_address_))
return false;
if (street_address_)
@@ -185,19 +181,16 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) {
// This code may not pick up pages that have an address field consisting of a
// sequence of unlabeled address fields. If we need to add this, see
// discussion on https://codereview.chromium.org/741493003/
- pattern = UTF8ToUTF16(kAddressLine2Re);
- label_pattern = UTF8ToUTF16(kAddressLine2LabelRe);
- if (!ParseField(scanner, pattern, &address2_) &&
- !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
- &address2_))
+ if (!ParseField(scanner, kAddressLine2Re, &address2_) &&
+ !ParseFieldSpecifics(scanner, kAddressLine2LabelRe,
+ MATCH_LABEL | MATCH_TEXT, &address2_))
return true;
// Optionally parse address line 3. This uses the same label regexp as
// address 2 above.
- pattern = UTF8ToUTF16(kAddressLinesExtraRe);
- if (!ParseField(scanner, pattern, &address3_) &&
- !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
- &address3_))
+ if (!ParseField(scanner, kAddressLinesExtraRe, &address3_) &&
+ !ParseFieldSpecifics(scanner, kAddressLinesExtraRe,
Ilya Sherman 2015/11/26 02:25:09 Please use kAddressLine2LabelRe here, as the code
tfarina 2015/11/26 14:22:27 ops, that is what happens when you try to make pat
+ MATCH_LABEL | MATCH_TEXT, &address3_))
return true;
// Try for surplus lines, which we will promptly discard. Some pages have 4
@@ -205,8 +198,7 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) {
//
// Since these are rare, don't bother considering unlabeled lines as extra
// address lines.
- pattern = UTF8ToUTF16(kAddressLinesExtraRe);
- while (ParseField(scanner, pattern, NULL)) {
+ while (ParseField(scanner, kAddressLinesExtraRe, NULL)) {
// Consumed a surplus line, try for another.
}
return true;
@@ -217,9 +209,7 @@ bool AddressField::ParseCountry(AutofillScanner* scanner) {
return false;
scanner->SaveCursor();
- if (ParseFieldSpecifics(scanner,
- UTF8ToUTF16(kCountryRe),
- MATCH_DEFAULT | MATCH_SELECT,
+ if (ParseFieldSpecifics(scanner, kCountryRe, MATCH_DEFAULT | MATCH_SELECT,
&country_)) {
return true;
}
@@ -227,8 +217,7 @@ bool AddressField::ParseCountry(AutofillScanner* scanner) {
// The occasional page (e.g. google account registration page) calls this a
// "location". However, this only makes sense for select tags.
scanner->Rewind();
- return ParseFieldSpecifics(scanner,
- UTF8ToUTF16(kCountryLocationRe),
+ return ParseFieldSpecifics(scanner, kCountryLocationRe,
MATCH_LABEL | MATCH_NAME | MATCH_SELECT,
&country_);
}
@@ -237,16 +226,13 @@ bool AddressField::ParseZipCode(AutofillScanner* scanner) {
if (zip_)
return false;
- if (!ParseFieldSpecifics(scanner,
- UTF8ToUTF16(kZipCodeRe),
- kZipCodeMatchType,
- &zip_)) {
+ if (!ParseFieldSpecifics(scanner, kZipCodeRe, kZipCodeMatchType, &zip_)) {
return false;
}
// Look for a zip+4, whose field name will also often contain
// the substring "zip".
- ParseFieldSpecifics(scanner, UTF8ToUTF16(kZip4Re), kZipCodeMatchType, &zip4_);
+ ParseFieldSpecifics(scanner, kZip4Re, kZipCodeMatchType, &zip4_);
return true;
}
@@ -254,20 +240,20 @@ bool AddressField::ParseCity(AutofillScanner* scanner) {
if (city_)
return false;
- return ParseFieldSpecifics(scanner,
- UTF8ToUTF16(kCityRe),
- kCityMatchType,
- &city_);
+ return ParseFieldSpecifics(scanner, kCityRe, kCityMatchType, &city_);
}
bool AddressField::ParseState(AutofillScanner* scanner) {
if (state_)
return false;
- return ParseFieldSpecifics(scanner,
- UTF8ToUTF16(kStateRe),
- kStateMatchType,
- &state_);
+ size_t saved_cursor = scanner->SaveCursor();
Ilya Sherman 2015/11/26 02:25:09 Please add a comment above this block of code like
tfarina 2015/11/26 14:22:27 Done.
+ if (ParseFieldSpecifics(scanner, "United States", kStateMatchType, nullptr)) {
+ scanner->RewindTo(saved_cursor);
+ return false;
+ }
+
+ return ParseFieldSpecifics(scanner, kStateRe, kStateMatchType, &state_);
}
bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) {
@@ -329,7 +315,7 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode(
return RESULT_MATCH_NONE;
ParseNameLabelResult result = ParseNameAndLabelSeparately(
- scanner, UTF8ToUTF16(kZipCodeRe), kZipCodeMatchType, &zip_);
+ scanner, kZipCodeRe, kZipCodeMatchType, &zip_);
if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd())
return result;
@@ -349,10 +335,7 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode(
if (!found_non_zip4) {
// Look for a zip+4, whose field name will also often contain
// the substring "zip".
- ParseFieldSpecifics(scanner,
- UTF8ToUTF16(kZip4Re),
- kZipCodeMatchType,
- &zip4_);
+ ParseFieldSpecifics(scanner, kZip4Re, kZipCodeMatchType, &zip4_);
}
return result;
}
@@ -362,8 +345,7 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity(
if (city_)
return RESULT_MATCH_NONE;
- return ParseNameAndLabelSeparately(
- scanner, UTF8ToUTF16(kCityRe), kCityMatchType, &city_);
+ return ParseNameAndLabelSeparately(scanner, kCityRe, kCityMatchType, &city_);
}
AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState(
@@ -371,8 +353,16 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState(
if (state_)
return RESULT_MATCH_NONE;
- return ParseNameAndLabelSeparately(
- scanner, UTF8ToUTF16(kStateRe), kStateMatchType, &state_);
+ size_t saved_cursor = scanner->SaveCursor();
+ ParseNameLabelResult result = ParseNameAndLabelSeparately(
+ scanner, "United States", kStateMatchType, nullptr);
+
+ if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd())
+ return result;
+ scanner->RewindTo(saved_cursor);
Ilya Sherman 2015/11/26 02:25:09 This logic is not correct. I still think it shoul
tfarina 2015/11/26 14:22:27 Done.
+
+ return ParseNameAndLabelSeparately(scanner, kStateRe, kStateMatchType,
+ &state_);
}
} // namespace autofill

Powered by Google App Engine
This is Rietveld 408576698