Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4875)

Unified Diff: chrome/browser/autofill/phone_field.cc

Issue 6480083: Changed parsing code for the phonenumbers fields to incorporate different com... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « chrome/browser/autofill/phone_field.h ('k') | chrome/browser/autofill/phone_field_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/browser/autofill/phone_field.cc
===================================================================
--- chrome/browser/autofill/phone_field.cc (revision 74701)
+++ chrome/browser/autofill/phone_field.cc (working copy)
@@ -15,6 +15,85 @@
#include "grit/autofill_resources.h"
#include "ui/base/l10n/l10n_util.h"
+// Phone field grammars - first matched grammar will be parsed. Grammars are
+// separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are
+// parsed separately unless they are necessary parts of the match.
+// The following notation is used to describe the patterns:
+// <cc> - country code field.
+// <ac> - area code field.
+// <phone> - phone or prefix.
+// <suffix> - suffix.
+// <ext> - extension.
+// :N means field is limited to N characters, otherwise it is unlimited.
+// (pattern <field>)? means pattern is optional and matched separately.
+PhoneField::Parser PhoneField::phone_field_grammars_[] = {
+ // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix>
+ // (Ext: <ext>)?)?
+ { PhoneField::REGEX_COUNTRY, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 },
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 3 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)?
+ { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PREFIX, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SUFFIX, PhoneField::FIELD_SUFFIX, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 },
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <cc> - <ac> - <phone> (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <ac> - <phone> (Ext: <ext>)?
+ { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 },
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+ // Phone: <phone> (Ext: <ext>)?
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 },
+};
+
// static
PhoneField* PhoneField::Parse(std::vector<AutoFillField*>::const_iterator* iter,
bool is_ecml) {
@@ -31,7 +110,7 @@
// be the last as it is a catch all case ("fax" and "faxarea" parsed as FAX,
// but "area" and "someotherarea" parsed as HOME, for example).
for (int i = PHONE_TYPE_MAX - 1; i >= PHONE_TYPE_FIRST; --i) {
- phone_field->SetPhoneType(static_cast<PhoneField::PHONE_TYPE>(i));
+ phone_field->SetPhoneType(static_cast<PhoneField::PhoneType>(i));
if (ParseInternal(phone_field.get(), iter, i == HOME_PHONE))
return phone_field.release();
}
@@ -47,7 +126,7 @@
AutoFillField* field;
if (ParseText(iter, pattern, &field)) {
PhoneField* phone_field = new PhoneField();
- phone_field->phone_ = field;
+ phone_field->parsed_phone_fields_[FIELD_PHONE] = field;
return phone_field;
}
@@ -55,35 +134,42 @@
}
bool PhoneField::GetFieldInfo(FieldTypeMap* field_type_map) const {
- bool ok;
+ bool ok = false;
- if (area_code_ != NULL) {
- ok = Add(field_type_map, area_code_,
- AutoFillType(number_->GetCityCodeType()));
- DCHECK(ok);
+ DCHECK(parsed_phone_fields_[FIELD_PHONE]); // Phone was correctly parsed.
- if (prefix_ != NULL) {
- // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form
- // we fill only the prefix depending on the size of the input field.
- ok = ok && Add(field_type_map,
- prefix_,
- AutoFillType(number_->GetNumberType()));
+ if ((parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) ||
+ (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) ||
+ (parsed_phone_fields_[FIELD_SUFFIX] != NULL)) {
+ if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) {
+ ok = Add(field_type_map,
+ parsed_phone_fields_[FIELD_COUNTRY_CODE],
+ AutoFillType(number_->GetCountryCodeType()));
DCHECK(ok);
- // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form
- // we fill only the suffix depending on the size of the input field.
- ok = ok && Add(field_type_map,
- phone_,
- AutoFillType(number_->GetNumberType()));
+ }
+ if (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) {
+ ok = Add(field_type_map,
+ parsed_phone_fields_[FIELD_AREA_CODE],
+ AutoFillType(number_->GetCityCodeType()));
DCHECK(ok);
- } else {
- ok = ok && Add(field_type_map,
- phone_,
- AutoFillType(number_->GetNumberType()));
+ }
+ // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form
+ // we fill only the prefix depending on the size of the input field.
+ ok = Add(field_type_map,
+ parsed_phone_fields_[FIELD_PHONE],
+ AutoFillType(number_->GetNumberType()));
+ DCHECK(ok);
+ // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form
+ // we fill only the suffix depending on the size of the input field.
+ if (parsed_phone_fields_[FIELD_SUFFIX] != NULL) {
+ ok = Add(field_type_map,
+ parsed_phone_fields_[FIELD_SUFFIX],
+ AutoFillType(number_->GetNumberType()));
DCHECK(ok);
}
} else {
ok = Add(field_type_map,
- phone_,
+ parsed_phone_fields_[FIELD_PHONE],
AutoFillType(number_->GetWholeNumberType()));
DCHECK(ok);
}
@@ -91,19 +177,29 @@
return ok;
}
-PhoneField::PhoneField()
- : phone_(NULL),
- area_code_(NULL),
- prefix_(NULL),
- extension_(NULL) {
+PhoneField::PhoneField() {
+ memset(parsed_phone_fields_, 0, sizeof(parsed_phone_fields_));
SetPhoneType(HOME_PHONE);
}
+string16 PhoneField::GetCountryRegex() const {
+ // This one is the same for Home and Fax numbers.
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_CODE_RE);
+}
+
string16 PhoneField::GetAreaRegex() const {
// This one is the same for Home and Fax numbers.
- return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE);
+ string16 area_code = l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE);
+ area_code.append(ASCIIToUTF16("|")); // Regexp separator.
+ area_code.append(GetAreaNoTextRegex());
+ return area_code;
}
+string16 PhoneField::GetAreaNoTextRegex() const {
+ // This one is the same for Home and Fax numbers.
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_NOTEXT_RE);
+}
+
string16 PhoneField::GetPhoneRegex() const {
if (phone_type_ == HOME_PHONE)
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_RE);
@@ -114,11 +210,21 @@
return string16();
}
+string16 PhoneField::GetPrefixSeparatorRegex() const {
+ // This one is the same for Home and Fax numbers.
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_SEPARATOR_RE);
+}
+
string16 PhoneField::GetPrefixRegex() const {
// This one is the same for Home and Fax numbers.
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_RE);
}
+string16 PhoneField::GetSuffixSeparatorRegex() const {
+ // This one is the same for Home and Fax numbers.
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_SEPARATOR_RE);
+}
+
string16 PhoneField::GetSuffixRegex() const {
// This one is the same for Home and Fax numbers.
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_RE);
@@ -129,6 +235,24 @@
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_EXTENSION_RE);
}
+string16 PhoneField::GetRegExp(RegexType regex_id) const {
+ switch (regex_id) {
+ case REGEX_COUNTRY: return GetCountryRegex();
+ case REGEX_AREA: return GetAreaRegex();
+ case REGEX_AREA_NOTEXT: return GetAreaNoTextRegex();
+ case REGEX_PHONE: return GetPhoneRegex();
+ case REGEX_PREFIX_SEPARATOR: return GetPrefixSeparatorRegex();
+ case REGEX_PREFIX: return GetPrefixRegex();
+ case REGEX_SUFFIX_SEPARATOR: return GetSuffixSeparatorRegex();
+ case REGEX_SUFFIX: return GetSuffixRegex();
+ case REGEX_EXTENSION: return GetExtensionRegex();
+ default:
+ NOTREACHED();
+ break;
+ }
+ return string16();
+}
+
// static
bool PhoneField::ParseInternal(
PhoneField *phone_field,
@@ -141,85 +265,60 @@
return false;
std::vector<AutoFillField*>::const_iterator q = *iter;
+
// The form owns the following variables, so they should not be deleted.
- AutoFillField* phone = NULL;
- AutoFillField* phone2 = NULL;
- AutoFillField* phone3 = NULL;
- bool area_code = false; // true if we've parsed an area code field.
+ AutoFillField* parsed_fields[FIELD_MAX];
- // Some pages, such as BloomingdalesShipping.html, have a field labeled
- // "Area Code and Phone"; we want to parse this as a phone number field so
- // we look for "phone" before we look for "area code".
- if (ParseText(&q, phone_field->GetPhoneRegex(), &phone)) {
- area_code = false;
- // Check the case when the match is for non-home phone and area code, e.g.
- // first field is a "Fax area code" and the subsequent is "Fax phone".
- if (!regular_phone) {
- // Attempt parsing of the same field as an area code and then phone:
- std::vector<AutoFillField*>::const_iterator temp_it = *iter;
- AutoFillField* tmp_phone1 = NULL;
- AutoFillField* tmp_phone2 = NULL;
- if (ParseText(&temp_it, phone_field->GetAreaRegex(), &tmp_phone1) &&
- ParseText(&temp_it, phone_field->GetPhoneRegex(), &tmp_phone2)) {
- phone = tmp_phone1;
- phone2 = tmp_phone2;
- q = temp_it;
- area_code = true;
+ for (size_t i = 0; i < arraysize(phone_field_grammars_); ++i) {
+ memset(parsed_fields, 0, sizeof(parsed_fields));
+ q = *iter;
+ // Attempt to parse next possible match.
+ for (; i < arraysize(phone_field_grammars_) &&
+ phone_field_grammars_[i].regex != REGEX_SEPARATOR; ++i) {
+ if (!ParseText(&q, phone_field->GetRegExp(phone_field_grammars_[i].regex),
+ &parsed_fields[phone_field_grammars_[i].phone_part]))
+ break;
+ if (phone_field_grammars_[i].max_size &&
+ (!parsed_fields[phone_field_grammars_[i].phone_part]->max_length() ||
+ phone_field_grammars_[i].max_size <
+ parsed_fields[phone_field_grammars_[i].phone_part]->max_length())) {
+ break;
}
}
- } else {
- if (!ParseText(&q, phone_field->GetAreaRegex(), &phone))
- return false;
- area_code = true;
- // If this is not a home phone and there was no specification before
- // the phone number actually starts (e.g. field 1 "Area code:", field 2
- // "Fax:"), we skip searching for preffix and suffix and bail out.
- if (!ParseText(&q, phone_field->GetPhoneRegex(), &phone2) && !regular_phone)
- return false;
+ if (i >= arraysize(phone_field_grammars_))
+ return false; // Parsing failed.
+ if (phone_field_grammars_[i].regex == REGEX_SEPARATOR)
+ break; // Parsing succeeded.
+ do {
+ ++i;
+ } while (phone_field_grammars_[i].regex != REGEX_SEPARATOR);
}
+ if (!parsed_fields[FIELD_PHONE])
+ return false;
- // Sometimes phone number fields are separated by "-" (e.g. test page
- // Crate and Barrel Check Out.html). Also, area codes are sometimes
- // surrounded by parentheses, so a ")" may appear after the area code field.
- //
- // We used to match "tel" here, which we've seen in field names (e.g. on
- // Newegg2.html), but that's too general: some pages (e.g.
- // uk/Furniture123-1.html) have several phone numbers in succession and we
- // don't want those to be parsed as components of a single phone number.
- if (phone2 == NULL)
- ParseText(&q, phone_field->GetPrefixRegex(), &phone2);
+ for (int i = 0; i < FIELD_MAX; ++i)
+ phone_field->parsed_phone_fields_[i] = parsed_fields[i];
+ // Look for optional fields.
+
// Look for a third text box.
- if (phone2)
- ParseText(&q, phone_field->GetSuffixRegex(), &phone3);
-
- // Now we have one, two, or three phone number text fields. Package them
- // up into a PhoneField object.
-
- if (phone2 == NULL) { // only one field
- if (area_code) {
- // It's an area code - it doesn't make sense.
- return false;
+ if (!phone_field->parsed_phone_fields_[FIELD_SUFFIX]) {
+ if (!ParseText(&q, phone_field->GetSuffixRegex(),
+ &phone_field->parsed_phone_fields_[FIELD_SUFFIX])) {
+ ParseText(&q, phone_field->GetSuffixSeparatorRegex(),
+ &phone_field->parsed_phone_fields_[FIELD_SUFFIX]);
}
- phone_field->phone_ = phone;
- } else {
- phone_field->area_code_ = phone;
- if (phone3 == NULL) { // two fields
- phone_field->phone_ = phone2;
- } else { // three boxes: area code, prefix and suffix
- phone_field->prefix_ = phone2;
- phone_field->phone_ = phone3;
- }
}
// Now look for an extension.
- ParseText(&q, phone_field->GetExtensionRegex(), &phone_field->extension_);
+ ParseText(&q, phone_field->GetExtensionRegex(),
+ &phone_field->parsed_phone_fields_[FIELD_EXTENSION]);
*iter = q;
return true;
}
-void PhoneField::SetPhoneType(PHONE_TYPE phone_type) {
+void PhoneField::SetPhoneType(PhoneType phone_type) {
// Field types are different as well, so we create a temporary phone number,
// to get relevant field types.
if (phone_type == HOME_PHONE)
« no previous file with comments | « chrome/browser/autofill/phone_field.h ('k') | chrome/browser/autofill/phone_field_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698