Chromium Code Reviews| Index: chrome/browser/autofill/address_field.cc |
| diff --git a/chrome/browser/autofill/address_field.cc b/chrome/browser/autofill/address_field.cc |
| index d3acc560ddb53484e907f272b176f2f6fd5ac131..7370fa097fa4f10973ef6ded4a649be19d63e7a6 100644 |
| --- a/chrome/browser/autofill/address_field.cc |
| +++ b/chrome/browser/autofill/address_field.cc |
| @@ -12,200 +12,11 @@ |
| #include "base/string_util.h" |
| #include "base/utf_string_conversions.h" |
| #include "chrome/browser/autofill/autofill_field.h" |
| +#include "chrome/browser/autofill/autofill_regex_constants.h" |
| #include "chrome/browser/autofill/autofill_scanner.h" |
| #include "chrome/browser/autofill/field_types.h" |
| #include "ui/base/l10n/l10n_util.h" |
| -namespace { |
| - |
| -// The UTF-8 version of these regular expressions are in |
| -// regular_expressions.txt. |
| -const char kAttentionIgnoredRe[] = "attention|attn"; |
| -const char kRegionIgnoredRe[] = |
| - "province|region|other" |
| - // es |
| - "|provincia" |
| - // pt-BR, pt-PT |
| - "|bairro|suburb"; |
| -const char kCompanyRe[] = |
| - "company|business|organization|organisation" |
| - // de-DE |
| - "|firma|firmenname" |
| - // es |
| - "|empresa" |
| - // fr-FR |
| - "|societe|soci\xc3\xa9t\xc3\xa9" |
| - // it-IT |
| - "|ragione.?sociale" |
| - // ja-JP |
| - "|\xe4\xbc\x9a\xe7\xa4\xbe" |
| - // ru |
| - "|\xd0\xbd\xd0\xb0\xd0\xb7\xd0\xb2\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5.?\xd0" |
| - "\xba\xd0\xbe\xd0\xbc\xd0\xbf\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb8" |
| - // zh-CN |
| - "|\xe5\x8d\x95\xe4\xbd\x8d|\xe5\x85\xac\xe5\x8f\xb8" |
| - // ko-KR |
| - "|\xed\x9a\x8c\xec\x82\xac|\xec\xa7\x81\xec\x9e\xa5"; |
| -const char kAddressLine1Re[] = |
| - "address.*line|address1|addr1|street" |
| - // de-DE |
| - "|strasse|stra\xc3\x9f""e|hausnummer|housenumber" |
| - // en-GB |
| - "|house.?name" |
| - // es |
| - "|direccion|direcci\xc3\xb3n" |
| - // fr-FR |
| - "|adresse" |
| - // it-IT |
| - "|indirizzo" |
| - // ja-JP |
| - "|\xe4\xbd\x8f\xe6\x89\x80""1" |
| - // pt-BR, pt-PT |
| - "|morada|endere\xc3\xa7o" |
| - // ru |
| - "|\xd0\x90\xd0\xb4\xd1\x80\xd0\xb5\xd1\x81" |
| - // zh-CN |
| - "|\xe5\x9c\xb0\xe5\x9d\x80" |
| - // ko-KR |
| - "|\xec\xa3\xbc\xec\x86\x8c.?1"; |
| -const char kAddressLine1LabelRe[] = |
| - "address" |
| - // fr-FR |
| - "|adresse" |
| - // it-IT |
| - "|indirizzo" |
| - // ja-JP |
| - "|\xe4\xbd\x8f\xe6\x89\x80" |
| - // zh-CN |
| - "|\xe5\x9c\xb0\xe5\x9d\x80" |
| - // ko-KR |
| - "|\xec\xa3\xbc\xec\x86\x8c"; |
| -const char kAddressLine2Re[] = |
| - "address.*line2|address2|addr2|street|suite|unit" |
| - // de-DE |
| - "|adresszusatz|erg\xc3\xa4nzende.?angaben" |
| - // es |
| - "|direccion2|colonia|adicional" |
| - // fr-FR |
| - "|addresssuppl|complementnom|appartement" |
| - // it-IT |
| - "|indirizzo2" |
| - // ja-JP |
| - "|\xe4\xbd\x8f\xe6\x89\x80""2" |
| - // pt-BR, pt-PT |
| - "|complemento|addrcomplement" |
| - // ru |
| - "|\xd0\xa3\xd0\xbb\xd0\xb8\xd1\x86\xd0\xb0" |
| - // zh-CN |
| - "|\xe5\x9c\xb0\xe5\x9d\x80""2" |
| - // ko-KR |
| - "|\xec\xa3\xbc\xec\x86\x8c.?2"; |
| -const char kAddressLine2LabelRe[] = |
| - "address" |
| - // fr-FR |
| - "|adresse" |
| - // it-IT |
| - "|indirizzo" |
| - // zh-CN |
| - "|\xe5\x9c\xb0\xe5\x9d\x80" |
| - // ko-KR |
| - "|\xec\xa3\xbc\xec\x86\x8c"; |
| -const char kAddressLine3Re[] = |
| - "address.*line3|address3|addr3|street|line3" |
| - // es |
| - "|municipio" |
| - // fr-FR |
| - "|batiment|residence" |
| - // it-IT |
| - "|indirizzo3"; |
| -const char kCountryRe[] = |
| - "country|countries|location" |
| - // es |
| - "|pa\xc3\xads|pais" |
| - // ja-JP |
| - "|\xe5\x9b\xbd" |
| - // zh-CN |
| - "|\xe5\x9b\xbd\xe5\xae\xb6" |
| - // ko-KR |
| - "|\xea\xb5\xad\xea\xb0\x80|\xeb\x82\x98\xeb\x9d\xbc"; |
| -const char kZipCodeRe[] = |
| - "zip|postal|post.*code|pcode|^1z$" |
| - // de-DE |
| - "|postleitzahl" |
| - // es |
| - "|\\bcp\\b" |
| - // fr-FR |
| - "|\\bcdp\\b" |
| - // it-IT |
| - "|\\bcap\\b" |
| - // ja-JP |
| - "|\xe9\x83\xb5\xe4\xbe\xbf\xe7\x95\xaa\xe5\x8f\xb7" |
| - // pt-BR, pt-PT |
| - "|codigo|codpos|\\bcep\\b" |
| - // ru |
| - "|\xd0\x9f\xd0\xbe\xd1\x87\xd1\x82\xd0\xbe\xd0\xb2\xd1\x8b\xd0\xb9.?\xd0" |
| - "\x98\xd0\xbd\xd0\xb4\xd0\xb5\xd0\xba\xd1\x81" |
| - // zh-CN |
| - "|\xe9\x82\xae\xe6\x94\xbf\xe7\xbc\x96\xe7\xa0\x81|\xe9\x82\xae\xe7\xbc" |
| - "\x96" |
| - // zh-TW |
| - "|\xe9\x83\xb5\xe9\x81\x9e\xe5\x8d\x80\xe8\x99\x9f" |
| - // ko-KR |
| - "|\xec\x9a\xb0\xed\x8e\xb8.?\xeb\xb2\x88\xed\x98\xb8"; |
| -const char kZip4Re[] = |
| - "zip|^-$|post2" |
| - // pt-BR, pt-PT |
| - "|codpos2"; |
| -const char kCityRe[] = |
| - "city|town" |
| - // de-DE |
| - "|\\bort\\b|stadt" |
| - // en-AU |
| - "|suburb" |
| - // es |
| - "|ciudad|provincia|localidad|poblacion" |
| - // fr-FR |
| - "|ville|commune" |
| - // it-IT |
| - "|localita" |
| - // ja-JP |
| - "|\xe5\xb8\x82\xe5\x8c\xba\xe7\x94\xba\xe6\x9d\x91" |
| - // pt-BR, pt-PT |
| - "|cidade" |
| - // ru |
| - "|\xd0\x93\xd0\xbe\xd1\x80\xd0\xbe\xd0\xb4" |
| - // zh-CN |
| - "|\xe5\xb8\x82" |
| - // zh-TW |
| - "|\xe5\x88\x86\xe5\x8d\x80" |
| - // ko-KR |
| - "|^\xec\x8b\x9c[^\xeb\x8f\x84\xc2\xb7\xe3\x83\xbb]|\xec\x8b\x9c[\xc2\xb7" |
| - "\xe3\x83\xbb]?\xea\xb5\xb0[\xc2\xb7\xe3\x83\xbb]?\xea\xb5\xac"; |
| -const char kStateRe[] = |
| - "(?<!united )state|county|region|province" |
| - // de-DE |
| - "|land" |
| - // en-UK |
| - "|county|principality" |
| - // ja-JP |
| - "|\xe9\x83\xbd\xe9\x81\x93\xe5\xba\x9c\xe7\x9c\x8c" |
| - // pt-BR, pt-PT |
| - "|estado|provincia" |
| - // ru |
| - "|\xd0\xbe\xd0\xb1\xd0\xbb\xd0\xb0\xd1\x81\xd1\x82\xd1\x8c" |
| - // zh-CN |
| - "|\xe7\x9c\x81" |
| - // zh-TW |
| - "|\xe5\x9c\xb0\xe5\x8d\x80" |
| - // ko-KR |
| - "|^\xec\x8b\x9c[\xc2\xb7\xe3\x83\xbb]?\xeb\x8f\x84"; |
| -const char kAddressTypeSameAsRe[] = "same as"; |
| -const char kAddressTypeUseMyRe[] = "use my"; |
| -const char kBillingDesignatorRe[] = "bill"; |
| -const char kShippingDesignatorRe[] = "ship"; |
| - |
| -} // namespace |
| - |
| FormField* AddressField::Parse(AutofillScanner* scanner) { |
| if (scanner->IsEnd()) |
| return NULL; |
| @@ -214,8 +25,8 @@ FormField* AddressField::Parse(AutofillScanner* scanner) { |
| const AutofillField* const initial_field = scanner->Cursor(); |
| size_t saved_cursor = scanner->SaveCursor(); |
| - string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe); |
| - string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe); |
| + string16 attention_ignored = UTF8ToUTF16(autofill::kAttentionIgnoredRe); |
|
Ilya Sherman
2011/09/13 22:06:05
nit: Since we're generating these strings with a s
|
| + string16 region_ignored = UTF8ToUTF16(autofill::kRegionIgnoredRe); |
| // Allow address fields to appear in any order. |
| size_t begin_trailing_non_labeled_fields = 0; |
| @@ -376,7 +187,8 @@ bool AddressField::ParseCompany(AutofillScanner* scanner, |
| if (address_field->company_ && !address_field->company_->IsEmpty()) |
| return false; |
| - return ParseField(scanner, UTF8ToUTF16(kCompanyRe), &address_field->company_); |
| + return ParseField(scanner, UTF8ToUTF16(autofill::kCompanyRe), |
| + &address_field->company_); |
| } |
| // static |
| @@ -393,8 +205,8 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner, |
| if (address_field->address1_) |
| return false; |
| - string16 pattern = UTF8ToUTF16(kAddressLine1Re); |
| - string16 label_pattern = UTF8ToUTF16(kAddressLine1LabelRe); |
| + string16 pattern = UTF8ToUTF16(autofill::kAddressLine1Re); |
| + string16 label_pattern = UTF8ToUTF16(autofill::kAddressLine1LabelRe); |
| if (!ParseField(scanner, pattern, &address_field->address1_) && |
| !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
| @@ -405,8 +217,8 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner, |
| // Optionally parse more address lines, which may have empty labels. |
| // Some pages have 3 address lines (eg SharperImageModifyAccount.html) |
| // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! |
| - pattern = UTF8ToUTF16(kAddressLine2Re); |
| - label_pattern = UTF8ToUTF16(kAddressLine2LabelRe); |
| + pattern = UTF8ToUTF16(autofill::kAddressLine2Re); |
| + label_pattern = UTF8ToUTF16(autofill::kAddressLine2LabelRe); |
| if (!ParseEmptyLabel(scanner, &address_field->address2_) && |
| !ParseField(scanner, pattern, &address_field->address2_)) { |
| ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
| @@ -415,7 +227,7 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner, |
| // Try for a third line, which we will promptly discard. |
| if (address_field->address2_ != NULL) { |
| - pattern = UTF8ToUTF16(kAddressLine3Re); |
| + pattern = UTF8ToUTF16(autofill::kAddressLine3Re); |
| ParseField(scanner, pattern, NULL); |
| } |
| @@ -431,7 +243,7 @@ bool AddressField::ParseCountry(AutofillScanner* scanner, |
| return false; |
| return ParseFieldSpecifics(scanner, |
| - UTF8ToUTF16(kCountryRe), |
| + UTF8ToUTF16(autofill::kCountryRe), |
| MATCH_DEFAULT | MATCH_SELECT, |
| &address_field->country_); |
| } |
| @@ -449,7 +261,7 @@ bool AddressField::ParseZipCode(AutofillScanner* scanner, |
| if (address_field->zip_) |
| return false; |
| - string16 pattern = UTF8ToUTF16(kZipCodeRe); |
| + string16 pattern = UTF8ToUTF16(autofill::kZipCodeRe); |
| if (!ParseField(scanner, pattern, &address_field->zip_)) |
| return false; |
| @@ -457,7 +269,7 @@ bool AddressField::ParseZipCode(AutofillScanner* scanner, |
| // Look for a zip+4, whose field name will also often contain |
| // the substring "zip". |
| ParseField(scanner, |
| - UTF8ToUTF16(kZip4Re), |
| + UTF8ToUTF16(autofill::kZip4Re), |
| &address_field->zip4_); |
| return true; |
| @@ -473,7 +285,7 @@ bool AddressField::ParseCity(AutofillScanner* scanner, |
| // Select fields are allowed here. This occurs on top-100 site rediff.com. |
| return ParseFieldSpecifics(scanner, |
| - UTF8ToUTF16(kCityRe), |
| + UTF8ToUTF16(autofill::kCityRe), |
| MATCH_DEFAULT | MATCH_SELECT, |
| &address_field->city_); |
| } |
| @@ -485,15 +297,16 @@ bool AddressField::ParseState(AutofillScanner* scanner, |
| return false; |
| return ParseFieldSpecifics(scanner, |
| - UTF8ToUTF16(kStateRe), |
| + UTF8ToUTF16(autofill::kStateRe), |
| MATCH_DEFAULT | MATCH_SELECT, |
| &address_field->state_); |
| } |
| AddressField::AddressType AddressField::AddressTypeFromText( |
| const string16 &text) { |
| - if (text.find(UTF8ToUTF16(kAddressTypeSameAsRe)) != string16::npos || |
| - text.find(UTF8ToUTF16(kAddressTypeUseMyRe)) != string16::npos) |
| + size_t same_as = text.find(UTF8ToUTF16(autofill::kAddressTypeSameAsRe)); |
| + size_t use_shipping = text.find(UTF8ToUTF16(autofill::kAddressTypeUseMyRe)); |
| + if (same_as != string16::npos || use_shipping != string16::npos) |
| // This text could be a checkbox label such as "same as my billing |
| // address" or "use my shipping address". |
| // ++ It would help if we generally skipped all text that appears |
| @@ -503,8 +316,8 @@ AddressField::AddressType AddressField::AddressTypeFromText( |
| // Not all pages say "billing address" and "shipping address" explicitly; |
| // for example, Craft Catalog1.html has "Bill-to Address" and |
| // "Ship-to Address". |
| - size_t bill = text.rfind(UTF8ToUTF16(kBillingDesignatorRe)); |
| - size_t ship = text.rfind(UTF8ToUTF16(kShippingDesignatorRe)); |
| + size_t bill = text.rfind(UTF8ToUTF16(autofill::kBillingDesignatorRe)); |
| + size_t ship = text.rfind(UTF8ToUTF16(autofill::kShippingDesignatorRe)); |
| if (bill == string16::npos && ship == string16::npos) |
| return kGenericAddress; |