| Index: chrome/browser/autofill/address_field.cc
|
| diff --git a/chrome/browser/autofill/address_field.cc b/chrome/browser/autofill/address_field.cc
|
| index d3acc560ddb53484e907f272b176f2f6fd5ac131..7370fa097fa4f10973ef6ded4a649be19d63e7a6 100644
|
| --- a/chrome/browser/autofill/address_field.cc
|
| +++ b/chrome/browser/autofill/address_field.cc
|
| @@ -12,200 +12,11 @@
|
| #include "base/string_util.h"
|
| #include "base/utf_string_conversions.h"
|
| #include "chrome/browser/autofill/autofill_field.h"
|
| +#include "chrome/browser/autofill/autofill_regex_constants.h"
|
| #include "chrome/browser/autofill/autofill_scanner.h"
|
| #include "chrome/browser/autofill/field_types.h"
|
| #include "ui/base/l10n/l10n_util.h"
|
|
|
| -namespace {
|
| -
|
| -// The UTF-8 version of these regular expressions are in
|
| -// regular_expressions.txt.
|
| -const char kAttentionIgnoredRe[] = "attention|attn";
|
| -const char kRegionIgnoredRe[] =
|
| - "province|region|other"
|
| - // es
|
| - "|provincia"
|
| - // pt-BR, pt-PT
|
| - "|bairro|suburb";
|
| -const char kCompanyRe[] =
|
| - "company|business|organization|organisation"
|
| - // de-DE
|
| - "|firma|firmenname"
|
| - // es
|
| - "|empresa"
|
| - // fr-FR
|
| - "|societe|soci\xc3\xa9t\xc3\xa9"
|
| - // it-IT
|
| - "|ragione.?sociale"
|
| - // ja-JP
|
| - "|\xe4\xbc\x9a\xe7\xa4\xbe"
|
| - // ru
|
| - "|\xd0\xbd\xd0\xb0\xd0\xb7\xd0\xb2\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5.?\xd0"
|
| - "\xba\xd0\xbe\xd0\xbc\xd0\xbf\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb8"
|
| - // zh-CN
|
| - "|\xe5\x8d\x95\xe4\xbd\x8d|\xe5\x85\xac\xe5\x8f\xb8"
|
| - // ko-KR
|
| - "|\xed\x9a\x8c\xec\x82\xac|\xec\xa7\x81\xec\x9e\xa5";
|
| -const char kAddressLine1Re[] =
|
| - "address.*line|address1|addr1|street"
|
| - // de-DE
|
| - "|strasse|stra\xc3\x9f""e|hausnummer|housenumber"
|
| - // en-GB
|
| - "|house.?name"
|
| - // es
|
| - "|direccion|direcci\xc3\xb3n"
|
| - // fr-FR
|
| - "|adresse"
|
| - // it-IT
|
| - "|indirizzo"
|
| - // ja-JP
|
| - "|\xe4\xbd\x8f\xe6\x89\x80""1"
|
| - // pt-BR, pt-PT
|
| - "|morada|endere\xc3\xa7o"
|
| - // ru
|
| - "|\xd0\x90\xd0\xb4\xd1\x80\xd0\xb5\xd1\x81"
|
| - // zh-CN
|
| - "|\xe5\x9c\xb0\xe5\x9d\x80"
|
| - // ko-KR
|
| - "|\xec\xa3\xbc\xec\x86\x8c.?1";
|
| -const char kAddressLine1LabelRe[] =
|
| - "address"
|
| - // fr-FR
|
| - "|adresse"
|
| - // it-IT
|
| - "|indirizzo"
|
| - // ja-JP
|
| - "|\xe4\xbd\x8f\xe6\x89\x80"
|
| - // zh-CN
|
| - "|\xe5\x9c\xb0\xe5\x9d\x80"
|
| - // ko-KR
|
| - "|\xec\xa3\xbc\xec\x86\x8c";
|
| -const char kAddressLine2Re[] =
|
| - "address.*line2|address2|addr2|street|suite|unit"
|
| - // de-DE
|
| - "|adresszusatz|erg\xc3\xa4nzende.?angaben"
|
| - // es
|
| - "|direccion2|colonia|adicional"
|
| - // fr-FR
|
| - "|addresssuppl|complementnom|appartement"
|
| - // it-IT
|
| - "|indirizzo2"
|
| - // ja-JP
|
| - "|\xe4\xbd\x8f\xe6\x89\x80""2"
|
| - // pt-BR, pt-PT
|
| - "|complemento|addrcomplement"
|
| - // ru
|
| - "|\xd0\xa3\xd0\xbb\xd0\xb8\xd1\x86\xd0\xb0"
|
| - // zh-CN
|
| - "|\xe5\x9c\xb0\xe5\x9d\x80""2"
|
| - // ko-KR
|
| - "|\xec\xa3\xbc\xec\x86\x8c.?2";
|
| -const char kAddressLine2LabelRe[] =
|
| - "address"
|
| - // fr-FR
|
| - "|adresse"
|
| - // it-IT
|
| - "|indirizzo"
|
| - // zh-CN
|
| - "|\xe5\x9c\xb0\xe5\x9d\x80"
|
| - // ko-KR
|
| - "|\xec\xa3\xbc\xec\x86\x8c";
|
| -const char kAddressLine3Re[] =
|
| - "address.*line3|address3|addr3|street|line3"
|
| - // es
|
| - "|municipio"
|
| - // fr-FR
|
| - "|batiment|residence"
|
| - // it-IT
|
| - "|indirizzo3";
|
| -const char kCountryRe[] =
|
| - "country|countries|location"
|
| - // es
|
| - "|pa\xc3\xads|pais"
|
| - // ja-JP
|
| - "|\xe5\x9b\xbd"
|
| - // zh-CN
|
| - "|\xe5\x9b\xbd\xe5\xae\xb6"
|
| - // ko-KR
|
| - "|\xea\xb5\xad\xea\xb0\x80|\xeb\x82\x98\xeb\x9d\xbc";
|
| -const char kZipCodeRe[] =
|
| - "zip|postal|post.*code|pcode|^1z$"
|
| - // de-DE
|
| - "|postleitzahl"
|
| - // es
|
| - "|\\bcp\\b"
|
| - // fr-FR
|
| - "|\\bcdp\\b"
|
| - // it-IT
|
| - "|\\bcap\\b"
|
| - // ja-JP
|
| - "|\xe9\x83\xb5\xe4\xbe\xbf\xe7\x95\xaa\xe5\x8f\xb7"
|
| - // pt-BR, pt-PT
|
| - "|codigo|codpos|\\bcep\\b"
|
| - // ru
|
| - "|\xd0\x9f\xd0\xbe\xd1\x87\xd1\x82\xd0\xbe\xd0\xb2\xd1\x8b\xd0\xb9.?\xd0"
|
| - "\x98\xd0\xbd\xd0\xb4\xd0\xb5\xd0\xba\xd1\x81"
|
| - // zh-CN
|
| - "|\xe9\x82\xae\xe6\x94\xbf\xe7\xbc\x96\xe7\xa0\x81|\xe9\x82\xae\xe7\xbc"
|
| - "\x96"
|
| - // zh-TW
|
| - "|\xe9\x83\xb5\xe9\x81\x9e\xe5\x8d\x80\xe8\x99\x9f"
|
| - // ko-KR
|
| - "|\xec\x9a\xb0\xed\x8e\xb8.?\xeb\xb2\x88\xed\x98\xb8";
|
| -const char kZip4Re[] =
|
| - "zip|^-$|post2"
|
| - // pt-BR, pt-PT
|
| - "|codpos2";
|
| -const char kCityRe[] =
|
| - "city|town"
|
| - // de-DE
|
| - "|\\bort\\b|stadt"
|
| - // en-AU
|
| - "|suburb"
|
| - // es
|
| - "|ciudad|provincia|localidad|poblacion"
|
| - // fr-FR
|
| - "|ville|commune"
|
| - // it-IT
|
| - "|localita"
|
| - // ja-JP
|
| - "|\xe5\xb8\x82\xe5\x8c\xba\xe7\x94\xba\xe6\x9d\x91"
|
| - // pt-BR, pt-PT
|
| - "|cidade"
|
| - // ru
|
| - "|\xd0\x93\xd0\xbe\xd1\x80\xd0\xbe\xd0\xb4"
|
| - // zh-CN
|
| - "|\xe5\xb8\x82"
|
| - // zh-TW
|
| - "|\xe5\x88\x86\xe5\x8d\x80"
|
| - // ko-KR
|
| - "|^\xec\x8b\x9c[^\xeb\x8f\x84\xc2\xb7\xe3\x83\xbb]|\xec\x8b\x9c[\xc2\xb7"
|
| - "\xe3\x83\xbb]?\xea\xb5\xb0[\xc2\xb7\xe3\x83\xbb]?\xea\xb5\xac";
|
| -const char kStateRe[] =
|
| - "(?<!united )state|county|region|province"
|
| - // de-DE
|
| - "|land"
|
| - // en-UK
|
| - "|county|principality"
|
| - // ja-JP
|
| - "|\xe9\x83\xbd\xe9\x81\x93\xe5\xba\x9c\xe7\x9c\x8c"
|
| - // pt-BR, pt-PT
|
| - "|estado|provincia"
|
| - // ru
|
| - "|\xd0\xbe\xd0\xb1\xd0\xbb\xd0\xb0\xd1\x81\xd1\x82\xd1\x8c"
|
| - // zh-CN
|
| - "|\xe7\x9c\x81"
|
| - // zh-TW
|
| - "|\xe5\x9c\xb0\xe5\x8d\x80"
|
| - // ko-KR
|
| - "|^\xec\x8b\x9c[\xc2\xb7\xe3\x83\xbb]?\xeb\x8f\x84";
|
| -const char kAddressTypeSameAsRe[] = "same as";
|
| -const char kAddressTypeUseMyRe[] = "use my";
|
| -const char kBillingDesignatorRe[] = "bill";
|
| -const char kShippingDesignatorRe[] = "ship";
|
| -
|
| -} // namespace
|
| -
|
| FormField* AddressField::Parse(AutofillScanner* scanner) {
|
| if (scanner->IsEnd())
|
| return NULL;
|
| @@ -214,8 +25,8 @@ FormField* AddressField::Parse(AutofillScanner* scanner) {
|
| const AutofillField* const initial_field = scanner->Cursor();
|
| size_t saved_cursor = scanner->SaveCursor();
|
|
|
| - string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe);
|
| - string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe);
|
| + string16 attention_ignored = UTF8ToUTF16(autofill::kAttentionIgnoredRe);
|
| + string16 region_ignored = UTF8ToUTF16(autofill::kRegionIgnoredRe);
|
|
|
| // Allow address fields to appear in any order.
|
| size_t begin_trailing_non_labeled_fields = 0;
|
| @@ -376,7 +187,8 @@ bool AddressField::ParseCompany(AutofillScanner* scanner,
|
| if (address_field->company_ && !address_field->company_->IsEmpty())
|
| return false;
|
|
|
| - return ParseField(scanner, UTF8ToUTF16(kCompanyRe), &address_field->company_);
|
| + return ParseField(scanner, UTF8ToUTF16(autofill::kCompanyRe),
|
| + &address_field->company_);
|
| }
|
|
|
| // static
|
| @@ -393,8 +205,8 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner,
|
| if (address_field->address1_)
|
| return false;
|
|
|
| - string16 pattern = UTF8ToUTF16(kAddressLine1Re);
|
| - string16 label_pattern = UTF8ToUTF16(kAddressLine1LabelRe);
|
| + string16 pattern = UTF8ToUTF16(autofill::kAddressLine1Re);
|
| + string16 label_pattern = UTF8ToUTF16(autofill::kAddressLine1LabelRe);
|
|
|
| if (!ParseField(scanner, pattern, &address_field->address1_) &&
|
| !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
|
| @@ -405,8 +217,8 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner,
|
| // Optionally parse more address lines, which may have empty labels.
|
| // Some pages have 3 address lines (eg SharperImageModifyAccount.html)
|
| // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)!
|
| - pattern = UTF8ToUTF16(kAddressLine2Re);
|
| - label_pattern = UTF8ToUTF16(kAddressLine2LabelRe);
|
| + pattern = UTF8ToUTF16(autofill::kAddressLine2Re);
|
| + label_pattern = UTF8ToUTF16(autofill::kAddressLine2LabelRe);
|
| if (!ParseEmptyLabel(scanner, &address_field->address2_) &&
|
| !ParseField(scanner, pattern, &address_field->address2_)) {
|
| ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
|
| @@ -415,7 +227,7 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner,
|
|
|
| // Try for a third line, which we will promptly discard.
|
| if (address_field->address2_ != NULL) {
|
| - pattern = UTF8ToUTF16(kAddressLine3Re);
|
| + pattern = UTF8ToUTF16(autofill::kAddressLine3Re);
|
| ParseField(scanner, pattern, NULL);
|
| }
|
|
|
| @@ -431,7 +243,7 @@ bool AddressField::ParseCountry(AutofillScanner* scanner,
|
| return false;
|
|
|
| return ParseFieldSpecifics(scanner,
|
| - UTF8ToUTF16(kCountryRe),
|
| + UTF8ToUTF16(autofill::kCountryRe),
|
| MATCH_DEFAULT | MATCH_SELECT,
|
| &address_field->country_);
|
| }
|
| @@ -449,7 +261,7 @@ bool AddressField::ParseZipCode(AutofillScanner* scanner,
|
| if (address_field->zip_)
|
| return false;
|
|
|
| - string16 pattern = UTF8ToUTF16(kZipCodeRe);
|
| + string16 pattern = UTF8ToUTF16(autofill::kZipCodeRe);
|
| if (!ParseField(scanner, pattern, &address_field->zip_))
|
| return false;
|
|
|
| @@ -457,7 +269,7 @@ bool AddressField::ParseZipCode(AutofillScanner* scanner,
|
| // Look for a zip+4, whose field name will also often contain
|
| // the substring "zip".
|
| ParseField(scanner,
|
| - UTF8ToUTF16(kZip4Re),
|
| + UTF8ToUTF16(autofill::kZip4Re),
|
| &address_field->zip4_);
|
|
|
| return true;
|
| @@ -473,7 +285,7 @@ bool AddressField::ParseCity(AutofillScanner* scanner,
|
|
|
| // Select fields are allowed here. This occurs on top-100 site rediff.com.
|
| return ParseFieldSpecifics(scanner,
|
| - UTF8ToUTF16(kCityRe),
|
| + UTF8ToUTF16(autofill::kCityRe),
|
| MATCH_DEFAULT | MATCH_SELECT,
|
| &address_field->city_);
|
| }
|
| @@ -485,15 +297,16 @@ bool AddressField::ParseState(AutofillScanner* scanner,
|
| return false;
|
|
|
| return ParseFieldSpecifics(scanner,
|
| - UTF8ToUTF16(kStateRe),
|
| + UTF8ToUTF16(autofill::kStateRe),
|
| MATCH_DEFAULT | MATCH_SELECT,
|
| &address_field->state_);
|
| }
|
|
|
| AddressField::AddressType AddressField::AddressTypeFromText(
|
| const string16 &text) {
|
| - if (text.find(UTF8ToUTF16(kAddressTypeSameAsRe)) != string16::npos ||
|
| - text.find(UTF8ToUTF16(kAddressTypeUseMyRe)) != string16::npos)
|
| + size_t same_as = text.find(UTF8ToUTF16(autofill::kAddressTypeSameAsRe));
|
| + size_t use_shipping = text.find(UTF8ToUTF16(autofill::kAddressTypeUseMyRe));
|
| + if (same_as != string16::npos || use_shipping != string16::npos)
|
| // This text could be a checkbox label such as "same as my billing
|
| // address" or "use my shipping address".
|
| // ++ It would help if we generally skipped all text that appears
|
| @@ -503,8 +316,8 @@ AddressField::AddressType AddressField::AddressTypeFromText(
|
| // Not all pages say "billing address" and "shipping address" explicitly;
|
| // for example, Craft Catalog1.html has "Bill-to Address" and
|
| // "Ship-to Address".
|
| - size_t bill = text.rfind(UTF8ToUTF16(kBillingDesignatorRe));
|
| - size_t ship = text.rfind(UTF8ToUTF16(kShippingDesignatorRe));
|
| + size_t bill = text.rfind(UTF8ToUTF16(autofill::kBillingDesignatorRe));
|
| + size_t ship = text.rfind(UTF8ToUTF16(autofill::kShippingDesignatorRe));
|
|
|
| if (bill == string16::npos && ship == string16::npos)
|
| return kGenericAddress;
|
|
|