 Chromium Code Reviews
 Chromium Code Reviews Issue 1453193002:
  autofill: switch autofill_regexes to RE2 library  (Closed) 
  Base URL: https://chromium.googlesource.com/chromium/src.git@master
    
  
    Issue 1453193002:
  autofill: switch autofill_regexes to RE2 library  (Closed) 
  Base URL: https://chromium.googlesource.com/chromium/src.git@master| Index: components/autofill/core/browser/address_field.cc | 
| diff --git a/components/autofill/core/browser/address_field.cc b/components/autofill/core/browser/address_field.cc | 
| index bf96dc2e75860ca64d95a8be1e20ca9be5360e7a..ce1ca7e2d19284203f399b7f4ed118ff05972127 100644 | 
| --- a/components/autofill/core/browser/address_field.cc | 
| +++ b/components/autofill/core/browser/address_field.cc | 
| @@ -16,6 +16,7 @@ | 
| #include "components/autofill/core/browser/autofill_scanner.h" | 
| #include "components/autofill/core/browser/field_types.h" | 
| +using base::ASCIIToUTF16; | 
| using base::UTF8ToUTF16; | 
| 
Ilya Sherman
2015/11/26 02:25:09
Are these needed?
 
tfarina
2015/11/26 14:22:27
Done.
 | 
| namespace autofill { | 
| @@ -48,9 +49,6 @@ scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { | 
| const AutofillField* const initial_field = scanner->Cursor(); | 
| size_t saved_cursor = scanner->SaveCursor(); | 
| - base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe); | 
| - base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe); | 
| - | 
| // Allow address fields to appear in any order. | 
| size_t begin_trailing_non_labeled_fields = 0; | 
| bool has_trailing_non_labeled_fields = false; | 
| @@ -62,8 +60,8 @@ scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { | 
| address_field->ParseCompany(scanner)) { | 
| has_trailing_non_labeled_fields = false; | 
| continue; | 
| - } else if (ParseField(scanner, attention_ignored, NULL) || | 
| - ParseField(scanner, region_ignored, NULL)) { | 
| + } else if (ParseField(scanner, kAttentionIgnoredRe, NULL) || | 
| + ParseField(scanner, kRegionIgnoredRe, NULL)) { | 
| // We ignore the following: | 
| // * Attention. | 
| // * Province/Region/Other. | 
| @@ -148,7 +146,7 @@ bool AddressField::ParseCompany(AutofillScanner* scanner) { | 
| if (company_ && !company_->IsEmpty()) | 
| return false; | 
| - return ParseField(scanner, UTF8ToUTF16(kCompanyRe), &company_); | 
| + return ParseField(scanner, kCompanyRe, &company_); | 
| } | 
| bool AddressField::ParseAddressLines(AutofillScanner* scanner) { | 
| @@ -164,19 +162,17 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) { | 
| return false; | 
| // Ignore "Address Lookup" field. http://crbug.com/427622 | 
| - if (ParseField(scanner, base::UTF8ToUTF16(kAddressLookupRe), NULL)) | 
| + if (ParseField(scanner, kAddressLookupRe, NULL)) | 
| return false; | 
| - base::string16 pattern = UTF8ToUTF16(kAddressLine1Re); | 
| - base::string16 label_pattern = UTF8ToUTF16(kAddressLine1LabelRe); | 
| - if (!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, &address1_) && | 
| - !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | 
| + if (!ParseFieldSpecifics(scanner, kAddressLine1Re, MATCH_DEFAULT, | 
| &address1_) && | 
| - !ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_TEXT_AREA, | 
| - &street_address_) && | 
| - !ParseFieldSpecifics(scanner, label_pattern, | 
| - MATCH_LABEL | MATCH_TEXT_AREA, | 
| - &street_address_)) | 
| + !ParseFieldSpecifics(scanner, kAddressLine1LabelRe, | 
| + MATCH_LABEL | MATCH_TEXT, &address1_) && | 
| + !ParseFieldSpecifics(scanner, kAddressLine1Re, | 
| + MATCH_DEFAULT | MATCH_TEXT_AREA, &street_address_) && | 
| + !ParseFieldSpecifics(scanner, kAddressLine1LabelRe, | 
| + MATCH_LABEL | MATCH_TEXT_AREA, &street_address_)) | 
| return false; | 
| if (street_address_) | 
| @@ -185,19 +181,16 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) { | 
| // This code may not pick up pages that have an address field consisting of a | 
| // sequence of unlabeled address fields. If we need to add this, see | 
| // discussion on https://codereview.chromium.org/741493003/ | 
| - pattern = UTF8ToUTF16(kAddressLine2Re); | 
| - label_pattern = UTF8ToUTF16(kAddressLine2LabelRe); | 
| - if (!ParseField(scanner, pattern, &address2_) && | 
| - !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | 
| - &address2_)) | 
| + if (!ParseField(scanner, kAddressLine2Re, &address2_) && | 
| + !ParseFieldSpecifics(scanner, kAddressLine2LabelRe, | 
| + MATCH_LABEL | MATCH_TEXT, &address2_)) | 
| return true; | 
| // Optionally parse address line 3. This uses the same label regexp as | 
| // address 2 above. | 
| - pattern = UTF8ToUTF16(kAddressLinesExtraRe); | 
| - if (!ParseField(scanner, pattern, &address3_) && | 
| - !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | 
| - &address3_)) | 
| + if (!ParseField(scanner, kAddressLinesExtraRe, &address3_) && | 
| + !ParseFieldSpecifics(scanner, kAddressLinesExtraRe, | 
| 
Ilya Sherman
2015/11/26 02:25:09
Please use kAddressLine2LabelRe here, as the code
 
tfarina
2015/11/26 14:22:27
ops, that is what happens when you try to make pat
 | 
| + MATCH_LABEL | MATCH_TEXT, &address3_)) | 
| return true; | 
| // Try for surplus lines, which we will promptly discard. Some pages have 4 | 
| @@ -205,8 +198,7 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) { | 
| // | 
| // Since these are rare, don't bother considering unlabeled lines as extra | 
| // address lines. | 
| - pattern = UTF8ToUTF16(kAddressLinesExtraRe); | 
| - while (ParseField(scanner, pattern, NULL)) { | 
| + while (ParseField(scanner, kAddressLinesExtraRe, NULL)) { | 
| // Consumed a surplus line, try for another. | 
| } | 
| return true; | 
| @@ -217,9 +209,7 @@ bool AddressField::ParseCountry(AutofillScanner* scanner) { | 
| return false; | 
| scanner->SaveCursor(); | 
| - if (ParseFieldSpecifics(scanner, | 
| - UTF8ToUTF16(kCountryRe), | 
| - MATCH_DEFAULT | MATCH_SELECT, | 
| + if (ParseFieldSpecifics(scanner, kCountryRe, MATCH_DEFAULT | MATCH_SELECT, | 
| &country_)) { | 
| return true; | 
| } | 
| @@ -227,8 +217,7 @@ bool AddressField::ParseCountry(AutofillScanner* scanner) { | 
| // The occasional page (e.g. google account registration page) calls this a | 
| // "location". However, this only makes sense for select tags. | 
| scanner->Rewind(); | 
| - return ParseFieldSpecifics(scanner, | 
| - UTF8ToUTF16(kCountryLocationRe), | 
| + return ParseFieldSpecifics(scanner, kCountryLocationRe, | 
| MATCH_LABEL | MATCH_NAME | MATCH_SELECT, | 
| &country_); | 
| } | 
| @@ -237,16 +226,13 @@ bool AddressField::ParseZipCode(AutofillScanner* scanner) { | 
| if (zip_) | 
| return false; | 
| - if (!ParseFieldSpecifics(scanner, | 
| - UTF8ToUTF16(kZipCodeRe), | 
| - kZipCodeMatchType, | 
| - &zip_)) { | 
| + if (!ParseFieldSpecifics(scanner, kZipCodeRe, kZipCodeMatchType, &zip_)) { | 
| return false; | 
| } | 
| // Look for a zip+4, whose field name will also often contain | 
| // the substring "zip". | 
| - ParseFieldSpecifics(scanner, UTF8ToUTF16(kZip4Re), kZipCodeMatchType, &zip4_); | 
| + ParseFieldSpecifics(scanner, kZip4Re, kZipCodeMatchType, &zip4_); | 
| return true; | 
| } | 
| @@ -254,20 +240,20 @@ bool AddressField::ParseCity(AutofillScanner* scanner) { | 
| if (city_) | 
| return false; | 
| - return ParseFieldSpecifics(scanner, | 
| - UTF8ToUTF16(kCityRe), | 
| - kCityMatchType, | 
| - &city_); | 
| + return ParseFieldSpecifics(scanner, kCityRe, kCityMatchType, &city_); | 
| } | 
| bool AddressField::ParseState(AutofillScanner* scanner) { | 
| if (state_) | 
| return false; | 
| - return ParseFieldSpecifics(scanner, | 
| - UTF8ToUTF16(kStateRe), | 
| - kStateMatchType, | 
| - &state_); | 
| + size_t saved_cursor = scanner->SaveCursor(); | 
| 
Ilya Sherman
2015/11/26 02:25:09
Please add a comment above this block of code like
 
tfarina
2015/11/26 14:22:27
Done.
 | 
| + if (ParseFieldSpecifics(scanner, "United States", kStateMatchType, nullptr)) { | 
| + scanner->RewindTo(saved_cursor); | 
| + return false; | 
| + } | 
| + | 
| + return ParseFieldSpecifics(scanner, kStateRe, kStateMatchType, &state_); | 
| } | 
| bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) { | 
| @@ -329,7 +315,7 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode( | 
| return RESULT_MATCH_NONE; | 
| ParseNameLabelResult result = ParseNameAndLabelSeparately( | 
| - scanner, UTF8ToUTF16(kZipCodeRe), kZipCodeMatchType, &zip_); | 
| + scanner, kZipCodeRe, kZipCodeMatchType, &zip_); | 
| if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd()) | 
| return result; | 
| @@ -349,10 +335,7 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode( | 
| if (!found_non_zip4) { | 
| // Look for a zip+4, whose field name will also often contain | 
| // the substring "zip". | 
| - ParseFieldSpecifics(scanner, | 
| - UTF8ToUTF16(kZip4Re), | 
| - kZipCodeMatchType, | 
| - &zip4_); | 
| + ParseFieldSpecifics(scanner, kZip4Re, kZipCodeMatchType, &zip4_); | 
| } | 
| return result; | 
| } | 
| @@ -362,8 +345,7 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity( | 
| if (city_) | 
| return RESULT_MATCH_NONE; | 
| - return ParseNameAndLabelSeparately( | 
| - scanner, UTF8ToUTF16(kCityRe), kCityMatchType, &city_); | 
| + return ParseNameAndLabelSeparately(scanner, kCityRe, kCityMatchType, &city_); | 
| } | 
| AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState( | 
| @@ -371,8 +353,16 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState( | 
| if (state_) | 
| return RESULT_MATCH_NONE; | 
| - return ParseNameAndLabelSeparately( | 
| - scanner, UTF8ToUTF16(kStateRe), kStateMatchType, &state_); | 
| + size_t saved_cursor = scanner->SaveCursor(); | 
| + ParseNameLabelResult result = ParseNameAndLabelSeparately( | 
| + scanner, "United States", kStateMatchType, nullptr); | 
| + | 
| + if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd()) | 
| + return result; | 
| + scanner->RewindTo(saved_cursor); | 
| 
Ilya Sherman
2015/11/26 02:25:09
This logic is not correct.  I still think it shoul
 
tfarina
2015/11/26 14:22:27
Done.
 | 
| + | 
| + return ParseNameAndLabelSeparately(scanner, kStateRe, kStateMatchType, | 
| + &state_); | 
| } | 
| } // namespace autofill |