Index: chrome/browser/autofill/phone_field.cc |
=================================================================== |
--- chrome/browser/autofill/phone_field.cc (revision 74701) |
+++ chrome/browser/autofill/phone_field.cc (working copy) |
@@ -15,6 +15,85 @@ |
#include "grit/autofill_resources.h" |
#include "ui/base/l10n/l10n_util.h" |
+// Phone field grammars - first matched grammar will be parsed. Grammars are |
+// separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are |
+// parsed separately unless they are necessary part of the match. |
Ilya Sherman
2011/02/16 23:22:08
nit: part -> parts
GeorgeY
2011/02/17 00:21:18
Done.
|
+// The following comment are indicating the matched pattern: |
Ilya Sherman
2011/02/16 23:22:08
nit: Perhaps: "The following notation is used to d
GeorgeY
2011/02/17 00:21:18
Done.
|
+// <cc> - country code field. |
+// <ac> - area code field. |
+// <phone> - phone or prefix. |
+// <suffix> - suffix. |
+// <ext> - extension. |
+// :N means field is limited to N characters, otherwise it is unlimited. |
+// (pattern <field>)? means patter is optional and matche d separately. |
Ilya Sherman
2011/02/16 23:22:08
nit: patter -> pattern, matche d -> matched
GeorgeY
2011/02/17 00:21:18
Done.
|
+PhoneField::Parser PhoneField::phone_field_grammars_[] = { |
+ // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix> |
+ // (Ext: <ext>)?)? |
+ { PhoneField::REGEX_COUNTRY, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
+ { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
+ // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 }, |
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 }, |
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 }, |
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
+ // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 3 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 }, |
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
+ // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)? |
+ { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
+ // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 }, |
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
+ // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
+ { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
+ // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
+ { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
+ // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
+ // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PREFIX, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SUFFIX, PhoneField::FIELD_SUFFIX, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
+ // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 }, |
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 }, |
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
+ // Phone: <cc> - <ac> - <phone> (Ext: <ext>)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
+ // Phone: <ac> - <phone> (Ext: <ext>)? |
+ { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
+ // Phone: <phone> (Ext: <ext>)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
+}; |
+ |
// static |
PhoneField* PhoneField::Parse(std::vector<AutoFillField*>::const_iterator* iter, |
bool is_ecml) { |
@@ -31,7 +110,7 @@ |
// be the last as it is a catch all case ("fax" and "faxarea" parsed as FAX, |
// but "area" and "someotherarea" parsed as HOME, for example). |
for (int i = PHONE_TYPE_MAX - 1; i >= PHONE_TYPE_FIRST; --i) { |
- phone_field->SetPhoneType(static_cast<PhoneField::PHONE_TYPE>(i)); |
+ phone_field->SetPhoneType(static_cast<PhoneField::PhoneType>(i)); |
if (ParseInternal(phone_field.get(), iter, i == HOME_PHONE)) |
return phone_field.release(); |
} |
@@ -47,7 +126,7 @@ |
AutoFillField* field; |
if (ParseText(iter, pattern, &field)) { |
PhoneField* phone_field = new PhoneField(); |
- phone_field->phone_ = field; |
+ phone_field->parsed_phone_fields_[FIELD_PHONE] = field; |
return phone_field; |
} |
@@ -55,35 +134,42 @@ |
} |
bool PhoneField::GetFieldInfo(FieldTypeMap* field_type_map) const { |
- bool ok; |
+ bool ok = false; |
- if (area_code_ != NULL) { |
- ok = Add(field_type_map, area_code_, |
- AutoFillType(number_->GetCityCodeType())); |
- DCHECK(ok); |
+ DCHECK(parsed_phone_fields_[FIELD_PHONE]); // Phone was correctly parsed. |
- if (prefix_ != NULL) { |
- // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form |
- // we fill only the prefix depending on the size of the input field. |
- ok = ok && Add(field_type_map, |
- prefix_, |
- AutoFillType(number_->GetNumberType())); |
+ if ((parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) || |
+ (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) || |
+ (parsed_phone_fields_[FIELD_SUFFIX] != NULL)) { |
+ if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) { |
+ ok = Add(field_type_map, |
+ parsed_phone_fields_[FIELD_COUNTRY_CODE], |
+ AutoFillType(number_->GetCountryCodeType())); |
DCHECK(ok); |
- // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form |
- // we fill only the suffix depending on the size of the input field. |
- ok = ok && Add(field_type_map, |
- phone_, |
- AutoFillType(number_->GetNumberType())); |
+ } |
+ if (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) { |
+ ok = Add(field_type_map, |
+ parsed_phone_fields_[FIELD_AREA_CODE], |
+ AutoFillType(number_->GetCityCodeType())); |
DCHECK(ok); |
- } else { |
- ok = ok && Add(field_type_map, |
- phone_, |
- AutoFillType(number_->GetNumberType())); |
+ } |
+ // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form |
+ // we fill only the prefix depending on the size of the input field. |
+ ok = Add(field_type_map, |
+ parsed_phone_fields_[FIELD_PHONE], |
+ AutoFillType(number_->GetNumberType())); |
+ DCHECK(ok); |
+ // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form |
+ // we fill only the suffix depending on the size of the input field. |
+ if (parsed_phone_fields_[FIELD_SUFFIX] != NULL) { |
+ ok = Add(field_type_map, |
+ parsed_phone_fields_[FIELD_SUFFIX], |
+ AutoFillType(number_->GetNumberType())); |
DCHECK(ok); |
} |
} else { |
ok = Add(field_type_map, |
- phone_, |
+ parsed_phone_fields_[FIELD_PHONE], |
AutoFillType(number_->GetWholeNumberType())); |
DCHECK(ok); |
} |
@@ -91,19 +177,29 @@ |
return ok; |
} |
-PhoneField::PhoneField() |
- : phone_(NULL), |
- area_code_(NULL), |
- prefix_(NULL), |
- extension_(NULL) { |
+PhoneField::PhoneField() { |
+ memset(parsed_phone_fields_, 0, sizeof(parsed_phone_fields_)); |
SetPhoneType(HOME_PHONE); |
} |
+string16 PhoneField::GetCountryRegex() const { |
+ // This one is the same for Home and Fax numbers. |
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_CODE_RE); |
+} |
+ |
string16 PhoneField::GetAreaRegex() const { |
// This one is the same for Home and Fax numbers. |
- return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE); |
+ string16 area_code = l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE); |
+ area_code.append(ASCIIToUTF16("|")); // Regexp separator. |
+ area_code.append(GetAreaNoTextRegex()); |
+ return area_code; |
} |
+string16 PhoneField::GetAreaNoTextRegex() const { |
+ // This one is the same for Home and Fax numbers. |
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_NOTEXT_RE); |
+} |
+ |
string16 PhoneField::GetPhoneRegex() const { |
if (phone_type_ == HOME_PHONE) |
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_RE); |
@@ -114,11 +210,21 @@ |
return string16(); |
} |
+string16 PhoneField::GetPrefixSeparatorRegex() const { |
+ // This one is the same for Home and Fax numbers. |
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_SEPARATOR_RE); |
+} |
+ |
string16 PhoneField::GetPrefixRegex() const { |
// This one is the same for Home and Fax numbers. |
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_RE); |
} |
+string16 PhoneField::GetSuffixSeparatorRegex() const { |
+ // This one is the same for Home and Fax numbers. |
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_SEPARATOR_RE); |
+} |
+ |
string16 PhoneField::GetSuffixRegex() const { |
// This one is the same for Home and Fax numbers. |
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_RE); |
@@ -129,6 +235,24 @@ |
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_EXTENSION_RE); |
} |
+string16 PhoneField::GetRegExp(RegexType regex_id) const { |
+ switch (regex_id) { |
+ case REGEX_COUNTRY: return GetCountryRegex(); |
+ case REGEX_AREA: return GetAreaRegex(); |
+ case REGEX_AREA_NOTEXT: return GetAreaNoTextRegex(); |
+ case REGEX_PHONE: return GetPhoneRegex(); |
+ case REGEX_PREFIX_SEPARATOR: return GetPrefixSeparatorRegex(); |
+ case REGEX_PREFIX: return GetPrefixRegex(); |
+ case REGEX_SUFFIX_SEPARATOR: return GetSuffixSeparatorRegex(); |
+ case REGEX_SUFFIX: return GetSuffixRegex(); |
+ case REGEX_EXTENSION: return GetExtensionRegex(); |
+ default: |
+ NOTREACHED(); |
+ break; |
+ } |
+ return string16(); |
+} |
+ |
// static |
bool PhoneField::ParseInternal( |
PhoneField *phone_field, |
@@ -141,85 +265,60 @@ |
return false; |
std::vector<AutoFillField*>::const_iterator q = *iter; |
+ |
// The form owns the following variables, so they should not be deleted. |
- AutoFillField* phone = NULL; |
- AutoFillField* phone2 = NULL; |
- AutoFillField* phone3 = NULL; |
- bool area_code = false; // true if we've parsed an area code field. |
+ AutoFillField* parsed_fields[FIELD_MAX]; |
- // Some pages, such as BloomingdalesShipping.html, have a field labeled |
- // "Area Code and Phone"; we want to parse this as a phone number field so |
- // we look for "phone" before we look for "area code". |
- if (ParseText(&q, phone_field->GetPhoneRegex(), &phone)) { |
- area_code = false; |
- // Check the case when the match is for non-home phone and area code, e.g. |
- // first field is a "Fax area code" and the subsequent is "Fax phone". |
- if (!regular_phone) { |
- // Attempt parsing of the same field as an area code and then phone: |
- std::vector<AutoFillField*>::const_iterator temp_it = *iter; |
- AutoFillField* tmp_phone1 = NULL; |
- AutoFillField* tmp_phone2 = NULL; |
- if (ParseText(&temp_it, phone_field->GetAreaRegex(), &tmp_phone1) && |
- ParseText(&temp_it, phone_field->GetPhoneRegex(), &tmp_phone2)) { |
- phone = tmp_phone1; |
- phone2 = tmp_phone2; |
- q = temp_it; |
- area_code = true; |
+ for (size_t i = 0; i < arraysize(phone_field_grammars_); ++i) { |
+ memset(parsed_fields, 0, sizeof(parsed_fields)); |
+ q = *iter; |
+ // Attempt to parse next possible match. |
+ for (; i < arraysize(phone_field_grammars_) && |
+ phone_field_grammars_[i].regex != REGEX_SEPARATOR; ++i) { |
+ if (!ParseText(&q, phone_field->GetRegExp(phone_field_grammars_[i].regex), |
+ &parsed_fields[phone_field_grammars_[i].phone_part])) |
+ break; |
+ if (phone_field_grammars_[i].max_size && |
+ (!parsed_fields[phone_field_grammars_[i].phone_part]->max_length() || |
+ phone_field_grammars_[i].max_size < |
+ parsed_fields[phone_field_grammars_[i].phone_part]->max_length())) { |
+ break; |
} |
} |
- } else { |
- if (!ParseText(&q, phone_field->GetAreaRegex(), &phone)) |
- return false; |
- area_code = true; |
- // If this is not a home phone and there was no specification before |
- // the phone number actually starts (e.g. field 1 "Area code:", field 2 |
- // "Fax:"), we skip searching for preffix and suffix and bail out. |
- if (!ParseText(&q, phone_field->GetPhoneRegex(), &phone2) && !regular_phone) |
- return false; |
+ if (i >= arraysize(phone_field_grammars_)) |
+ return false; // Parsing failed. |
+ if (phone_field_grammars_[i].regex == REGEX_SEPARATOR) |
+ break; // Parsing succeeded. |
+ do { |
+ ++i; |
+ } while (phone_field_grammars_[i].regex != REGEX_SEPARATOR); |
} |
+ if (!parsed_fields[FIELD_PHONE]) |
+ return false; |
Ilya Sherman
2011/02/16 23:22:08
Suppose the final pattern were "// Phone: <ac> - <
GeorgeY
2011/02/17 00:21:18
Nope. We will not get here until we have a complet
Ilya Sherman
2011/02/17 00:35:12
But there is no next pattern after the final patte
GeorgeY
2011/02/17 01:12:24
After the last pattern we bail out of the function
|
- // Sometimes phone number fields are separated by "-" (e.g. test page |
- // Crate and Barrel Check Out.html). Also, area codes are sometimes |
- // surrounded by parentheses, so a ")" may appear after the area code field. |
- // |
- // We used to match "tel" here, which we've seen in field names (e.g. on |
- // Newegg2.html), but that's too general: some pages (e.g. |
- // uk/Furniture123-1.html) have several phone numbers in succession and we |
- // don't want those to be parsed as components of a single phone number. |
- if (phone2 == NULL) |
- ParseText(&q, phone_field->GetPrefixRegex(), &phone2); |
+ for (int i = 0; i < FIELD_MAX; ++i) |
+ phone_field->parsed_phone_fields_[i] = parsed_fields[i]; |
+ // Look for optional fields. |
+ |
// Look for a third text box. |
- if (phone2) |
- ParseText(&q, phone_field->GetSuffixRegex(), &phone3); |
- |
- // Now we have one, two, or three phone number text fields. Package them |
- // up into a PhoneField object. |
- |
- if (phone2 == NULL) { // only one field |
- if (area_code) { |
- // It's an area code - it doesn't make sense. |
- return false; |
+ if (!phone_field->parsed_phone_fields_[FIELD_SUFFIX]) { |
+ if (!ParseText(&q, phone_field->GetSuffixRegex(), |
+ &phone_field->parsed_phone_fields_[FIELD_SUFFIX])) { |
+ ParseText(&q, phone_field->GetSuffixSeparatorRegex(), |
+ &phone_field->parsed_phone_fields_[FIELD_SUFFIX]); |
} |
- phone_field->phone_ = phone; |
- } else { |
- phone_field->area_code_ = phone; |
- if (phone3 == NULL) { // two fields |
- phone_field->phone_ = phone2; |
- } else { // three boxes: area code, prefix and suffix |
- phone_field->prefix_ = phone2; |
- phone_field->phone_ = phone3; |
- } |
} |
// Now look for an extension. |
- ParseText(&q, phone_field->GetExtensionRegex(), &phone_field->extension_); |
+ ParseText(&q, phone_field->GetExtensionRegex(), |
+ &phone_field->parsed_phone_fields_[FIELD_EXTENSION]); |
*iter = q; |
return true; |
} |
-void PhoneField::SetPhoneType(PHONE_TYPE phone_type) { |
+void PhoneField::SetPhoneType(PhoneType phone_type) { |
// Field types are different as well, so we create a temporary phone number, |
// to get relevant field types. |
if (phone_type == HOME_PHONE) |