Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(684)

Unified Diff: chrome/browser/autofill/address_field.cc

Issue 7891020: Make autofill regular expressions unicode again. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: remove extra gyp change Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « build/escape_unicode.py ('k') | chrome/browser/autofill/autofill_regex_constants.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/browser/autofill/address_field.cc
diff --git a/chrome/browser/autofill/address_field.cc b/chrome/browser/autofill/address_field.cc
index d3acc560ddb53484e907f272b176f2f6fd5ac131..7370fa097fa4f10973ef6ded4a649be19d63e7a6 100644
--- a/chrome/browser/autofill/address_field.cc
+++ b/chrome/browser/autofill/address_field.cc
@@ -12,200 +12,11 @@
#include "base/string_util.h"
#include "base/utf_string_conversions.h"
#include "chrome/browser/autofill/autofill_field.h"
+#include "chrome/browser/autofill/autofill_regex_constants.h"
#include "chrome/browser/autofill/autofill_scanner.h"
#include "chrome/browser/autofill/field_types.h"
#include "ui/base/l10n/l10n_util.h"
-namespace {
-
-// The UTF-8 version of these regular expressions are in
-// regular_expressions.txt.
-const char kAttentionIgnoredRe[] = "attention|attn";
-const char kRegionIgnoredRe[] =
- "province|region|other"
- // es
- "|provincia"
- // pt-BR, pt-PT
- "|bairro|suburb";
-const char kCompanyRe[] =
- "company|business|organization|organisation"
- // de-DE
- "|firma|firmenname"
- // es
- "|empresa"
- // fr-FR
- "|societe|soci\xc3\xa9t\xc3\xa9"
- // it-IT
- "|ragione.?sociale"
- // ja-JP
- "|\xe4\xbc\x9a\xe7\xa4\xbe"
- // ru
- "|\xd0\xbd\xd0\xb0\xd0\xb7\xd0\xb2\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5.?\xd0"
- "\xba\xd0\xbe\xd0\xbc\xd0\xbf\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb8"
- // zh-CN
- "|\xe5\x8d\x95\xe4\xbd\x8d|\xe5\x85\xac\xe5\x8f\xb8"
- // ko-KR
- "|\xed\x9a\x8c\xec\x82\xac|\xec\xa7\x81\xec\x9e\xa5";
-const char kAddressLine1Re[] =
- "address.*line|address1|addr1|street"
- // de-DE
- "|strasse|stra\xc3\x9f""e|hausnummer|housenumber"
- // en-GB
- "|house.?name"
- // es
- "|direccion|direcci\xc3\xb3n"
- // fr-FR
- "|adresse"
- // it-IT
- "|indirizzo"
- // ja-JP
- "|\xe4\xbd\x8f\xe6\x89\x80""1"
- // pt-BR, pt-PT
- "|morada|endere\xc3\xa7o"
- // ru
- "|\xd0\x90\xd0\xb4\xd1\x80\xd0\xb5\xd1\x81"
- // zh-CN
- "|\xe5\x9c\xb0\xe5\x9d\x80"
- // ko-KR
- "|\xec\xa3\xbc\xec\x86\x8c.?1";
-const char kAddressLine1LabelRe[] =
- "address"
- // fr-FR
- "|adresse"
- // it-IT
- "|indirizzo"
- // ja-JP
- "|\xe4\xbd\x8f\xe6\x89\x80"
- // zh-CN
- "|\xe5\x9c\xb0\xe5\x9d\x80"
- // ko-KR
- "|\xec\xa3\xbc\xec\x86\x8c";
-const char kAddressLine2Re[] =
- "address.*line2|address2|addr2|street|suite|unit"
- // de-DE
- "|adresszusatz|erg\xc3\xa4nzende.?angaben"
- // es
- "|direccion2|colonia|adicional"
- // fr-FR
- "|addresssuppl|complementnom|appartement"
- // it-IT
- "|indirizzo2"
- // ja-JP
- "|\xe4\xbd\x8f\xe6\x89\x80""2"
- // pt-BR, pt-PT
- "|complemento|addrcomplement"
- // ru
- "|\xd0\xa3\xd0\xbb\xd0\xb8\xd1\x86\xd0\xb0"
- // zh-CN
- "|\xe5\x9c\xb0\xe5\x9d\x80""2"
- // ko-KR
- "|\xec\xa3\xbc\xec\x86\x8c.?2";
-const char kAddressLine2LabelRe[] =
- "address"
- // fr-FR
- "|adresse"
- // it-IT
- "|indirizzo"
- // zh-CN
- "|\xe5\x9c\xb0\xe5\x9d\x80"
- // ko-KR
- "|\xec\xa3\xbc\xec\x86\x8c";
-const char kAddressLine3Re[] =
- "address.*line3|address3|addr3|street|line3"
- // es
- "|municipio"
- // fr-FR
- "|batiment|residence"
- // it-IT
- "|indirizzo3";
-const char kCountryRe[] =
- "country|countries|location"
- // es
- "|pa\xc3\xads|pais"
- // ja-JP
- "|\xe5\x9b\xbd"
- // zh-CN
- "|\xe5\x9b\xbd\xe5\xae\xb6"
- // ko-KR
- "|\xea\xb5\xad\xea\xb0\x80|\xeb\x82\x98\xeb\x9d\xbc";
-const char kZipCodeRe[] =
- "zip|postal|post.*code|pcode|^1z$"
- // de-DE
- "|postleitzahl"
- // es
- "|\\bcp\\b"
- // fr-FR
- "|\\bcdp\\b"
- // it-IT
- "|\\bcap\\b"
- // ja-JP
- "|\xe9\x83\xb5\xe4\xbe\xbf\xe7\x95\xaa\xe5\x8f\xb7"
- // pt-BR, pt-PT
- "|codigo|codpos|\\bcep\\b"
- // ru
- "|\xd0\x9f\xd0\xbe\xd1\x87\xd1\x82\xd0\xbe\xd0\xb2\xd1\x8b\xd0\xb9.?\xd0"
- "\x98\xd0\xbd\xd0\xb4\xd0\xb5\xd0\xba\xd1\x81"
- // zh-CN
- "|\xe9\x82\xae\xe6\x94\xbf\xe7\xbc\x96\xe7\xa0\x81|\xe9\x82\xae\xe7\xbc"
- "\x96"
- // zh-TW
- "|\xe9\x83\xb5\xe9\x81\x9e\xe5\x8d\x80\xe8\x99\x9f"
- // ko-KR
- "|\xec\x9a\xb0\xed\x8e\xb8.?\xeb\xb2\x88\xed\x98\xb8";
-const char kZip4Re[] =
- "zip|^-$|post2"
- // pt-BR, pt-PT
- "|codpos2";
-const char kCityRe[] =
- "city|town"
- // de-DE
- "|\\bort\\b|stadt"
- // en-AU
- "|suburb"
- // es
- "|ciudad|provincia|localidad|poblacion"
- // fr-FR
- "|ville|commune"
- // it-IT
- "|localita"
- // ja-JP
- "|\xe5\xb8\x82\xe5\x8c\xba\xe7\x94\xba\xe6\x9d\x91"
- // pt-BR, pt-PT
- "|cidade"
- // ru
- "|\xd0\x93\xd0\xbe\xd1\x80\xd0\xbe\xd0\xb4"
- // zh-CN
- "|\xe5\xb8\x82"
- // zh-TW
- "|\xe5\x88\x86\xe5\x8d\x80"
- // ko-KR
- "|^\xec\x8b\x9c[^\xeb\x8f\x84\xc2\xb7\xe3\x83\xbb]|\xec\x8b\x9c[\xc2\xb7"
- "\xe3\x83\xbb]?\xea\xb5\xb0[\xc2\xb7\xe3\x83\xbb]?\xea\xb5\xac";
-const char kStateRe[] =
- "(?<!united )state|county|region|province"
- // de-DE
- "|land"
- // en-UK
- "|county|principality"
- // ja-JP
- "|\xe9\x83\xbd\xe9\x81\x93\xe5\xba\x9c\xe7\x9c\x8c"
- // pt-BR, pt-PT
- "|estado|provincia"
- // ru
- "|\xd0\xbe\xd0\xb1\xd0\xbb\xd0\xb0\xd1\x81\xd1\x82\xd1\x8c"
- // zh-CN
- "|\xe7\x9c\x81"
- // zh-TW
- "|\xe5\x9c\xb0\xe5\x8d\x80"
- // ko-KR
- "|^\xec\x8b\x9c[\xc2\xb7\xe3\x83\xbb]?\xeb\x8f\x84";
-const char kAddressTypeSameAsRe[] = "same as";
-const char kAddressTypeUseMyRe[] = "use my";
-const char kBillingDesignatorRe[] = "bill";
-const char kShippingDesignatorRe[] = "ship";
-
-} // namespace
-
FormField* AddressField::Parse(AutofillScanner* scanner) {
if (scanner->IsEnd())
return NULL;
@@ -214,8 +25,8 @@ FormField* AddressField::Parse(AutofillScanner* scanner) {
const AutofillField* const initial_field = scanner->Cursor();
size_t saved_cursor = scanner->SaveCursor();
- string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe);
- string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe);
+ string16 attention_ignored = UTF8ToUTF16(autofill::kAttentionIgnoredRe);
+ string16 region_ignored = UTF8ToUTF16(autofill::kRegionIgnoredRe);
// Allow address fields to appear in any order.
size_t begin_trailing_non_labeled_fields = 0;
@@ -376,7 +187,8 @@ bool AddressField::ParseCompany(AutofillScanner* scanner,
if (address_field->company_ && !address_field->company_->IsEmpty())
return false;
- return ParseField(scanner, UTF8ToUTF16(kCompanyRe), &address_field->company_);
+ return ParseField(scanner, UTF8ToUTF16(autofill::kCompanyRe),
+ &address_field->company_);
}
// static
@@ -393,8 +205,8 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner,
if (address_field->address1_)
return false;
- string16 pattern = UTF8ToUTF16(kAddressLine1Re);
- string16 label_pattern = UTF8ToUTF16(kAddressLine1LabelRe);
+ string16 pattern = UTF8ToUTF16(autofill::kAddressLine1Re);
+ string16 label_pattern = UTF8ToUTF16(autofill::kAddressLine1LabelRe);
if (!ParseField(scanner, pattern, &address_field->address1_) &&
!ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
@@ -405,8 +217,8 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner,
// Optionally parse more address lines, which may have empty labels.
// Some pages have 3 address lines (eg SharperImageModifyAccount.html)
// Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)!
- pattern = UTF8ToUTF16(kAddressLine2Re);
- label_pattern = UTF8ToUTF16(kAddressLine2LabelRe);
+ pattern = UTF8ToUTF16(autofill::kAddressLine2Re);
+ label_pattern = UTF8ToUTF16(autofill::kAddressLine2LabelRe);
if (!ParseEmptyLabel(scanner, &address_field->address2_) &&
!ParseField(scanner, pattern, &address_field->address2_)) {
ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
@@ -415,7 +227,7 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner,
// Try for a third line, which we will promptly discard.
if (address_field->address2_ != NULL) {
- pattern = UTF8ToUTF16(kAddressLine3Re);
+ pattern = UTF8ToUTF16(autofill::kAddressLine3Re);
ParseField(scanner, pattern, NULL);
}
@@ -431,7 +243,7 @@ bool AddressField::ParseCountry(AutofillScanner* scanner,
return false;
return ParseFieldSpecifics(scanner,
- UTF8ToUTF16(kCountryRe),
+ UTF8ToUTF16(autofill::kCountryRe),
MATCH_DEFAULT | MATCH_SELECT,
&address_field->country_);
}
@@ -449,7 +261,7 @@ bool AddressField::ParseZipCode(AutofillScanner* scanner,
if (address_field->zip_)
return false;
- string16 pattern = UTF8ToUTF16(kZipCodeRe);
+ string16 pattern = UTF8ToUTF16(autofill::kZipCodeRe);
if (!ParseField(scanner, pattern, &address_field->zip_))
return false;
@@ -457,7 +269,7 @@ bool AddressField::ParseZipCode(AutofillScanner* scanner,
// Look for a zip+4, whose field name will also often contain
// the substring "zip".
ParseField(scanner,
- UTF8ToUTF16(kZip4Re),
+ UTF8ToUTF16(autofill::kZip4Re),
&address_field->zip4_);
return true;
@@ -473,7 +285,7 @@ bool AddressField::ParseCity(AutofillScanner* scanner,
// Select fields are allowed here. This occurs on top-100 site rediff.com.
return ParseFieldSpecifics(scanner,
- UTF8ToUTF16(kCityRe),
+ UTF8ToUTF16(autofill::kCityRe),
MATCH_DEFAULT | MATCH_SELECT,
&address_field->city_);
}
@@ -485,15 +297,16 @@ bool AddressField::ParseState(AutofillScanner* scanner,
return false;
return ParseFieldSpecifics(scanner,
- UTF8ToUTF16(kStateRe),
+ UTF8ToUTF16(autofill::kStateRe),
MATCH_DEFAULT | MATCH_SELECT,
&address_field->state_);
}
AddressField::AddressType AddressField::AddressTypeFromText(
const string16 &text) {
- if (text.find(UTF8ToUTF16(kAddressTypeSameAsRe)) != string16::npos ||
- text.find(UTF8ToUTF16(kAddressTypeUseMyRe)) != string16::npos)
+ size_t same_as = text.find(UTF8ToUTF16(autofill::kAddressTypeSameAsRe));
+ size_t use_shipping = text.find(UTF8ToUTF16(autofill::kAddressTypeUseMyRe));
+ if (same_as != string16::npos || use_shipping != string16::npos)
// This text could be a checkbox label such as "same as my billing
// address" or "use my shipping address".
// ++ It would help if we generally skipped all text that appears
@@ -503,8 +316,8 @@ AddressField::AddressType AddressField::AddressTypeFromText(
// Not all pages say "billing address" and "shipping address" explicitly;
// for example, Craft Catalog1.html has "Bill-to Address" and
// "Ship-to Address".
- size_t bill = text.rfind(UTF8ToUTF16(kBillingDesignatorRe));
- size_t ship = text.rfind(UTF8ToUTF16(kShippingDesignatorRe));
+ size_t bill = text.rfind(UTF8ToUTF16(autofill::kBillingDesignatorRe));
+ size_t ship = text.rfind(UTF8ToUTF16(autofill::kShippingDesignatorRe));
if (bill == string16::npos && ship == string16::npos)
return kGenericAddress;
« no previous file with comments | « build/escape_unicode.py ('k') | chrome/browser/autofill/autofill_regex_constants.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698