Index: chrome/browser/autofill/address_field.cc |
diff --git a/chrome/browser/autofill/address_field.cc b/chrome/browser/autofill/address_field.cc |
index d3acc560ddb53484e907f272b176f2f6fd5ac131..7370fa097fa4f10973ef6ded4a649be19d63e7a6 100644 |
--- a/chrome/browser/autofill/address_field.cc |
+++ b/chrome/browser/autofill/address_field.cc |
@@ -12,200 +12,11 @@ |
#include "base/string_util.h" |
#include "base/utf_string_conversions.h" |
#include "chrome/browser/autofill/autofill_field.h" |
+#include "chrome/browser/autofill/autofill_regex_constants.h" |
#include "chrome/browser/autofill/autofill_scanner.h" |
#include "chrome/browser/autofill/field_types.h" |
#include "ui/base/l10n/l10n_util.h" |
-namespace { |
- |
-// The UTF-8 version of these regular expressions are in |
-// regular_expressions.txt. |
-const char kAttentionIgnoredRe[] = "attention|attn"; |
-const char kRegionIgnoredRe[] = |
- "province|region|other" |
- // es |
- "|provincia" |
- // pt-BR, pt-PT |
- "|bairro|suburb"; |
-const char kCompanyRe[] = |
- "company|business|organization|organisation" |
- // de-DE |
- "|firma|firmenname" |
- // es |
- "|empresa" |
- // fr-FR |
- "|societe|soci\xc3\xa9t\xc3\xa9" |
- // it-IT |
- "|ragione.?sociale" |
- // ja-JP |
- "|\xe4\xbc\x9a\xe7\xa4\xbe" |
- // ru |
- "|\xd0\xbd\xd0\xb0\xd0\xb7\xd0\xb2\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5.?\xd0" |
- "\xba\xd0\xbe\xd0\xbc\xd0\xbf\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb8" |
- // zh-CN |
- "|\xe5\x8d\x95\xe4\xbd\x8d|\xe5\x85\xac\xe5\x8f\xb8" |
- // ko-KR |
- "|\xed\x9a\x8c\xec\x82\xac|\xec\xa7\x81\xec\x9e\xa5"; |
-const char kAddressLine1Re[] = |
- "address.*line|address1|addr1|street" |
- // de-DE |
- "|strasse|stra\xc3\x9f""e|hausnummer|housenumber" |
- // en-GB |
- "|house.?name" |
- // es |
- "|direccion|direcci\xc3\xb3n" |
- // fr-FR |
- "|adresse" |
- // it-IT |
- "|indirizzo" |
- // ja-JP |
- "|\xe4\xbd\x8f\xe6\x89\x80""1" |
- // pt-BR, pt-PT |
- "|morada|endere\xc3\xa7o" |
- // ru |
- "|\xd0\x90\xd0\xb4\xd1\x80\xd0\xb5\xd1\x81" |
- // zh-CN |
- "|\xe5\x9c\xb0\xe5\x9d\x80" |
- // ko-KR |
- "|\xec\xa3\xbc\xec\x86\x8c.?1"; |
-const char kAddressLine1LabelRe[] = |
- "address" |
- // fr-FR |
- "|adresse" |
- // it-IT |
- "|indirizzo" |
- // ja-JP |
- "|\xe4\xbd\x8f\xe6\x89\x80" |
- // zh-CN |
- "|\xe5\x9c\xb0\xe5\x9d\x80" |
- // ko-KR |
- "|\xec\xa3\xbc\xec\x86\x8c"; |
-const char kAddressLine2Re[] = |
- "address.*line2|address2|addr2|street|suite|unit" |
- // de-DE |
- "|adresszusatz|erg\xc3\xa4nzende.?angaben" |
- // es |
- "|direccion2|colonia|adicional" |
- // fr-FR |
- "|addresssuppl|complementnom|appartement" |
- // it-IT |
- "|indirizzo2" |
- // ja-JP |
- "|\xe4\xbd\x8f\xe6\x89\x80""2" |
- // pt-BR, pt-PT |
- "|complemento|addrcomplement" |
- // ru |
- "|\xd0\xa3\xd0\xbb\xd0\xb8\xd1\x86\xd0\xb0" |
- // zh-CN |
- "|\xe5\x9c\xb0\xe5\x9d\x80""2" |
- // ko-KR |
- "|\xec\xa3\xbc\xec\x86\x8c.?2"; |
-const char kAddressLine2LabelRe[] = |
- "address" |
- // fr-FR |
- "|adresse" |
- // it-IT |
- "|indirizzo" |
- // zh-CN |
- "|\xe5\x9c\xb0\xe5\x9d\x80" |
- // ko-KR |
- "|\xec\xa3\xbc\xec\x86\x8c"; |
-const char kAddressLine3Re[] = |
- "address.*line3|address3|addr3|street|line3" |
- // es |
- "|municipio" |
- // fr-FR |
- "|batiment|residence" |
- // it-IT |
- "|indirizzo3"; |
-const char kCountryRe[] = |
- "country|countries|location" |
- // es |
- "|pa\xc3\xads|pais" |
- // ja-JP |
- "|\xe5\x9b\xbd" |
- // zh-CN |
- "|\xe5\x9b\xbd\xe5\xae\xb6" |
- // ko-KR |
- "|\xea\xb5\xad\xea\xb0\x80|\xeb\x82\x98\xeb\x9d\xbc"; |
-const char kZipCodeRe[] = |
- "zip|postal|post.*code|pcode|^1z$" |
- // de-DE |
- "|postleitzahl" |
- // es |
- "|\\bcp\\b" |
- // fr-FR |
- "|\\bcdp\\b" |
- // it-IT |
- "|\\bcap\\b" |
- // ja-JP |
- "|\xe9\x83\xb5\xe4\xbe\xbf\xe7\x95\xaa\xe5\x8f\xb7" |
- // pt-BR, pt-PT |
- "|codigo|codpos|\\bcep\\b" |
- // ru |
- "|\xd0\x9f\xd0\xbe\xd1\x87\xd1\x82\xd0\xbe\xd0\xb2\xd1\x8b\xd0\xb9.?\xd0" |
- "\x98\xd0\xbd\xd0\xb4\xd0\xb5\xd0\xba\xd1\x81" |
- // zh-CN |
- "|\xe9\x82\xae\xe6\x94\xbf\xe7\xbc\x96\xe7\xa0\x81|\xe9\x82\xae\xe7\xbc" |
- "\x96" |
- // zh-TW |
- "|\xe9\x83\xb5\xe9\x81\x9e\xe5\x8d\x80\xe8\x99\x9f" |
- // ko-KR |
- "|\xec\x9a\xb0\xed\x8e\xb8.?\xeb\xb2\x88\xed\x98\xb8"; |
-const char kZip4Re[] = |
- "zip|^-$|post2" |
- // pt-BR, pt-PT |
- "|codpos2"; |
-const char kCityRe[] = |
- "city|town" |
- // de-DE |
- "|\\bort\\b|stadt" |
- // en-AU |
- "|suburb" |
- // es |
- "|ciudad|provincia|localidad|poblacion" |
- // fr-FR |
- "|ville|commune" |
- // it-IT |
- "|localita" |
- // ja-JP |
- "|\xe5\xb8\x82\xe5\x8c\xba\xe7\x94\xba\xe6\x9d\x91" |
- // pt-BR, pt-PT |
- "|cidade" |
- // ru |
- "|\xd0\x93\xd0\xbe\xd1\x80\xd0\xbe\xd0\xb4" |
- // zh-CN |
- "|\xe5\xb8\x82" |
- // zh-TW |
- "|\xe5\x88\x86\xe5\x8d\x80" |
- // ko-KR |
- "|^\xec\x8b\x9c[^\xeb\x8f\x84\xc2\xb7\xe3\x83\xbb]|\xec\x8b\x9c[\xc2\xb7" |
- "\xe3\x83\xbb]?\xea\xb5\xb0[\xc2\xb7\xe3\x83\xbb]?\xea\xb5\xac"; |
-const char kStateRe[] = |
- "(?<!united )state|county|region|province" |
- // de-DE |
- "|land" |
- // en-UK |
- "|county|principality" |
- // ja-JP |
- "|\xe9\x83\xbd\xe9\x81\x93\xe5\xba\x9c\xe7\x9c\x8c" |
- // pt-BR, pt-PT |
- "|estado|provincia" |
- // ru |
- "|\xd0\xbe\xd0\xb1\xd0\xbb\xd0\xb0\xd1\x81\xd1\x82\xd1\x8c" |
- // zh-CN |
- "|\xe7\x9c\x81" |
- // zh-TW |
- "|\xe5\x9c\xb0\xe5\x8d\x80" |
- // ko-KR |
- "|^\xec\x8b\x9c[\xc2\xb7\xe3\x83\xbb]?\xeb\x8f\x84"; |
-const char kAddressTypeSameAsRe[] = "same as"; |
-const char kAddressTypeUseMyRe[] = "use my"; |
-const char kBillingDesignatorRe[] = "bill"; |
-const char kShippingDesignatorRe[] = "ship"; |
- |
-} // namespace |
- |
FormField* AddressField::Parse(AutofillScanner* scanner) { |
if (scanner->IsEnd()) |
return NULL; |
@@ -214,8 +25,8 @@ FormField* AddressField::Parse(AutofillScanner* scanner) { |
const AutofillField* const initial_field = scanner->Cursor(); |
size_t saved_cursor = scanner->SaveCursor(); |
- string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe); |
- string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe); |
+ string16 attention_ignored = UTF8ToUTF16(autofill::kAttentionIgnoredRe); |
+ string16 region_ignored = UTF8ToUTF16(autofill::kRegionIgnoredRe); |
// Allow address fields to appear in any order. |
size_t begin_trailing_non_labeled_fields = 0; |
@@ -376,7 +187,8 @@ bool AddressField::ParseCompany(AutofillScanner* scanner, |
if (address_field->company_ && !address_field->company_->IsEmpty()) |
return false; |
- return ParseField(scanner, UTF8ToUTF16(kCompanyRe), &address_field->company_); |
+ return ParseField(scanner, UTF8ToUTF16(autofill::kCompanyRe), |
+ &address_field->company_); |
} |
// static |
@@ -393,8 +205,8 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner, |
if (address_field->address1_) |
return false; |
- string16 pattern = UTF8ToUTF16(kAddressLine1Re); |
- string16 label_pattern = UTF8ToUTF16(kAddressLine1LabelRe); |
+ string16 pattern = UTF8ToUTF16(autofill::kAddressLine1Re); |
+ string16 label_pattern = UTF8ToUTF16(autofill::kAddressLine1LabelRe); |
if (!ParseField(scanner, pattern, &address_field->address1_) && |
!ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
@@ -405,8 +217,8 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner, |
// Optionally parse more address lines, which may have empty labels. |
// Some pages have 3 address lines (eg SharperImageModifyAccount.html) |
// Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! |
- pattern = UTF8ToUTF16(kAddressLine2Re); |
- label_pattern = UTF8ToUTF16(kAddressLine2LabelRe); |
+ pattern = UTF8ToUTF16(autofill::kAddressLine2Re); |
+ label_pattern = UTF8ToUTF16(autofill::kAddressLine2LabelRe); |
if (!ParseEmptyLabel(scanner, &address_field->address2_) && |
!ParseField(scanner, pattern, &address_field->address2_)) { |
ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
@@ -415,7 +227,7 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner, |
// Try for a third line, which we will promptly discard. |
if (address_field->address2_ != NULL) { |
- pattern = UTF8ToUTF16(kAddressLine3Re); |
+ pattern = UTF8ToUTF16(autofill::kAddressLine3Re); |
ParseField(scanner, pattern, NULL); |
} |
@@ -431,7 +243,7 @@ bool AddressField::ParseCountry(AutofillScanner* scanner, |
return false; |
return ParseFieldSpecifics(scanner, |
- UTF8ToUTF16(kCountryRe), |
+ UTF8ToUTF16(autofill::kCountryRe), |
MATCH_DEFAULT | MATCH_SELECT, |
&address_field->country_); |
} |
@@ -449,7 +261,7 @@ bool AddressField::ParseZipCode(AutofillScanner* scanner, |
if (address_field->zip_) |
return false; |
- string16 pattern = UTF8ToUTF16(kZipCodeRe); |
+ string16 pattern = UTF8ToUTF16(autofill::kZipCodeRe); |
if (!ParseField(scanner, pattern, &address_field->zip_)) |
return false; |
@@ -457,7 +269,7 @@ bool AddressField::ParseZipCode(AutofillScanner* scanner, |
// Look for a zip+4, whose field name will also often contain |
// the substring "zip". |
ParseField(scanner, |
- UTF8ToUTF16(kZip4Re), |
+ UTF8ToUTF16(autofill::kZip4Re), |
&address_field->zip4_); |
return true; |
@@ -473,7 +285,7 @@ bool AddressField::ParseCity(AutofillScanner* scanner, |
// Select fields are allowed here. This occurs on top-100 site rediff.com. |
return ParseFieldSpecifics(scanner, |
- UTF8ToUTF16(kCityRe), |
+ UTF8ToUTF16(autofill::kCityRe), |
MATCH_DEFAULT | MATCH_SELECT, |
&address_field->city_); |
} |
@@ -485,15 +297,16 @@ bool AddressField::ParseState(AutofillScanner* scanner, |
return false; |
return ParseFieldSpecifics(scanner, |
- UTF8ToUTF16(kStateRe), |
+ UTF8ToUTF16(autofill::kStateRe), |
MATCH_DEFAULT | MATCH_SELECT, |
&address_field->state_); |
} |
AddressField::AddressType AddressField::AddressTypeFromText( |
const string16 &text) { |
- if (text.find(UTF8ToUTF16(kAddressTypeSameAsRe)) != string16::npos || |
- text.find(UTF8ToUTF16(kAddressTypeUseMyRe)) != string16::npos) |
+ size_t same_as = text.find(UTF8ToUTF16(autofill::kAddressTypeSameAsRe)); |
+ size_t use_shipping = text.find(UTF8ToUTF16(autofill::kAddressTypeUseMyRe)); |
+ if (same_as != string16::npos || use_shipping != string16::npos) |
// This text could be a checkbox label such as "same as my billing |
// address" or "use my shipping address". |
// ++ It would help if we generally skipped all text that appears |
@@ -503,8 +316,8 @@ AddressField::AddressType AddressField::AddressTypeFromText( |
// Not all pages say "billing address" and "shipping address" explicitly; |
// for example, Craft Catalog1.html has "Bill-to Address" and |
// "Ship-to Address". |
- size_t bill = text.rfind(UTF8ToUTF16(kBillingDesignatorRe)); |
- size_t ship = text.rfind(UTF8ToUTF16(kShippingDesignatorRe)); |
+ size_t bill = text.rfind(UTF8ToUTF16(autofill::kBillingDesignatorRe)); |
+ size_t ship = text.rfind(UTF8ToUTF16(autofill::kShippingDesignatorRe)); |
if (bill == string16::npos && ship == string16::npos) |
return kGenericAddress; |