Index: chrome/browser/autofill/phone_field.cc |
=================================================================== |
--- chrome/browser/autofill/phone_field.cc (revision 74701) |
+++ chrome/browser/autofill/phone_field.cc (working copy) |
@@ -15,6 +15,76 @@ |
#include "grit/autofill_resources.h" |
#include "ui/base/l10n/l10n_util.h" |
+// Phone field grammars - first matched grammar will be parsed. Grammars are |
+// separated by { REGEX_SEPARATOR, 0, 0 }. Suffix and extension are parsed |
+// separately unless they are necessary part of the match. |
+PhoneField::Parser PhoneField::phone_field_grammars_[] = { |
+ // Country code: CCFIELD Area Code: ACFIELD Phone: PHONE (- SUFFIX (- EXT)?)? |
+ { PhoneField::REGEX_COUNTRY, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
+ { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, 0, 0 }, |
dhollowa
2011/02/16 00:25:37
The second zero should be |FIELD_NULL| and through
Ilya Sherman
2011/02/16 09:33:22
I assume you mean the first zero?
dhollowa
2011/02/16 16:40:03
Yes.
On 2011/02/16 09:33:22, Ilya Sherman wrote:
GeorgeY
2011/02/16 20:53:34
Changed to FIELD_NONE
GeorgeY
2011/02/16 20:53:34
Yes, fixed
|
+ // Phone: CCFIELD ACFIELD - PHONE - SUFFIX (Ext: EXT)? |
Ilya Sherman
2011/02/16 09:33:22
I'm trying to understand the comment structure you
GeorgeY
2011/02/16 20:53:34
typo - fixed.
|
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 }, |
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 }, |
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 }, |
+ { PhoneField::REGEX_SEPARATOR, 0, 0 }, |
+ // Phone: CCFIELD:3 ACFIELD:3 PHONE:3 SUFFIX:4 (- EXT)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 3 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 }, |
+ { PhoneField::REGEX_SEPARATOR, 0, 0 }, |
+ // Area Code: ACFIELD Phone: PHONE (- SUFFIX (- EXT)?)? |
+ { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, 0, 0 }, |
+ // Phone: ACFIELD PHONE:3 SUFFIX:4 (- EXT)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 }, |
+ { PhoneField::REGEX_SEPARATOR, 0, 0 }, |
+ // Phone: CCFIELD \( ACFIELD \) PHONE (- SUFFIX (- EXT)?)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
+ { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, 0, 0 }, |
+ // Phone: \( ACFIELD \) PHONE (- SUFFIX (- EXT)?)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
+ { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, 0, 0 }, |
+ // Phone: CCFIELD - ACFIELD - PHONE - SUFFIX (Ext: EXT)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, 0, 0 }, |
+ // Phone: ACFIELD Prefix: PHONE Suffix: SUFFIX (Ext: EXT)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PREFIX, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SUFFIX, PhoneField::FIELD_SUFFIX, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, 0, 0 }, |
+ // Phone: ACFIELD - PHONE - SUFFIX (Ext: EXT)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 }, |
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 }, |
+ { PhoneField::REGEX_SEPARATOR, 0, 0 }, |
+ // Phone: CCFIELD - ACFIELD - PHONE (Ext: EXT)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
+ { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, 0, 0 }, |
+ // Phone: ACFIELD - PHONE (- SUFFIX (- EXT)?)? |
+ { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 }, |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, 0, 0 }, |
+ // Phone: PHONE (- SUFFIX (- EXT)?)? |
+ { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, |
+ { PhoneField::REGEX_SEPARATOR, 0, 0 }, |
+}; |
Ilya Sherman
2011/02/16 09:33:22
I'm finding this list pretty hard to grok. Do you
GeorgeY
2011/02/16 20:53:34
Probably would be difficult to generate them progr
|
+ |
// static |
PhoneField* PhoneField::Parse(std::vector<AutoFillField*>::const_iterator* iter, |
bool is_ecml) { |
@@ -47,7 +117,7 @@ |
AutoFillField* field; |
if (ParseText(iter, pattern, &field)) { |
PhoneField* phone_field = new PhoneField(); |
- phone_field->phone_ = field; |
+ phone_field->parsed_phone_fields_[FIELD_PHONE] = field; |
return phone_field; |
} |
@@ -55,35 +125,42 @@ |
} |
bool PhoneField::GetFieldInfo(FieldTypeMap* field_type_map) const { |
- bool ok; |
+ bool ok = false; |
- if (area_code_ != NULL) { |
- ok = Add(field_type_map, area_code_, |
- AutoFillType(number_->GetCityCodeType())); |
- DCHECK(ok); |
+ DCHECK(parsed_phone_fields_[FIELD_PHONE]); // Phone was incorrectly parsed. |
Ilya Sherman
2011/02/16 09:33:22
nit: I think you mean "was _correctly_ parsed"?
GeorgeY
2011/02/16 20:53:34
I written when DCHECK hits :). As we should encomp
|
- if (prefix_ != NULL) { |
- // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form |
- // we fill only the prefix depending on the size of the input field. |
- ok = ok && Add(field_type_map, |
- prefix_, |
- AutoFillType(number_->GetNumberType())); |
+ if ((parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) || |
+ (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) || |
+ (parsed_phone_fields_[FIELD_SUFFIX] != NULL)) { |
+ if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) { |
+ ok = Add(field_type_map, |
+ parsed_phone_fields_[FIELD_COUNTRY_CODE], |
+ AutoFillType(number_->GetCountryCodeType())); |
DCHECK(ok); |
- // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form |
- // we fill only the suffix depending on the size of the input field. |
- ok = ok && Add(field_type_map, |
- phone_, |
- AutoFillType(number_->GetNumberType())); |
+ } |
+ if (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) { |
+ ok = Add(field_type_map, |
+ parsed_phone_fields_[FIELD_AREA_CODE], |
+ AutoFillType(number_->GetCityCodeType())); |
DCHECK(ok); |
- } else { |
- ok = ok && Add(field_type_map, |
- phone_, |
- AutoFillType(number_->GetNumberType())); |
+ } |
+ // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form |
+ // we fill only the prefix depending on the size of the input field. |
+ ok = Add(field_type_map, |
+ parsed_phone_fields_[FIELD_PHONE], |
+ AutoFillType(number_->GetNumberType())); |
+ DCHECK(ok); |
+ // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form |
+ // we fill only the suffix depending on the size of the input field. |
+ if (parsed_phone_fields_[FIELD_SUFFIX] != NULL) { |
+ ok = Add(field_type_map, |
+ parsed_phone_fields_[FIELD_SUFFIX], |
+ AutoFillType(number_->GetNumberType())); |
DCHECK(ok); |
} |
} else { |
ok = Add(field_type_map, |
- phone_, |
+ parsed_phone_fields_[FIELD_PHONE], |
AutoFillType(number_->GetWholeNumberType())); |
DCHECK(ok); |
} |
@@ -91,19 +168,29 @@ |
return ok; |
} |
-PhoneField::PhoneField() |
- : phone_(NULL), |
- area_code_(NULL), |
- prefix_(NULL), |
- extension_(NULL) { |
+PhoneField::PhoneField() { |
+ memset(parsed_phone_fields_, 0, sizeof(AutoFillField*) * FIELD_MAX); |
dhollowa
2011/02/16 00:25:37
How about |sizeof(parsed_phone_fields_)| instead?
GeorgeY
2011/02/16 20:53:34
:)
Done. The vector is not there because: 1. The s
Ilya Sherman
2011/02/16 23:22:08
Please remind me: Who owns the pointers? Do we cl
|
SetPhoneType(HOME_PHONE); |
} |
+string16 PhoneField::GetCountryRegex() const { |
+ // This one is the same for Home and Fax numbers. |
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_CODE_RE); |
+} |
+ |
string16 PhoneField::GetAreaRegex() const { |
// This one is the same for Home and Fax numbers. |
- return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE); |
+ string16 area_code = l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE); |
+ area_code.append(ASCIIToUTF16("|")); // Regexp separator. |
+ area_code.append(GetAreaNoTextRegex()); |
+ return area_code; |
} |
+string16 PhoneField::GetAreaNoTextRegex() const { |
+ // This one is the same for Home and Fax numbers. |
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_NOTEXT_RE); |
+} |
+ |
string16 PhoneField::GetPhoneRegex() const { |
if (phone_type_ == HOME_PHONE) |
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_RE); |
@@ -114,11 +201,21 @@ |
return string16(); |
} |
+string16 PhoneField::GetPrefixSeparatorRegex() const { |
+ // This one is the same for Home and Fax numbers. |
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_SEPARATOR_RE); |
+} |
+ |
string16 PhoneField::GetPrefixRegex() const { |
// This one is the same for Home and Fax numbers. |
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_RE); |
} |
+string16 PhoneField::GetSuffixSeparatorRegex() const { |
+ // This one is the same for Home and Fax numbers. |
+ return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_SEPARATOR_RE); |
+} |
+ |
string16 PhoneField::GetSuffixRegex() const { |
// This one is the same for Home and Fax numbers. |
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_RE); |
@@ -129,6 +226,24 @@ |
return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_EXTENSION_RE); |
} |
+string16 PhoneField::GetRegExp(REGEX_MATCH regex_id) const { |
+ switch (regex_id) { |
+ case REGEX_COUNTRY: return GetCountryRegex(); |
+ case REGEX_AREA: return GetAreaRegex(); |
+ case REGEX_AREA_NOTEXT: return GetAreaNoTextRegex(); |
+ case REGEX_PHONE: return GetPhoneRegex(); |
+ case REGEX_PREFIX_SEPARATOR: return GetPrefixSeparatorRegex(); |
+ case REGEX_PREFIX: return GetPrefixRegex(); |
+ case REGEX_SUFFIX_SEPARATOR: return GetSuffixSeparatorRegex(); |
+ case REGEX_SUFFIX: return GetSuffixRegex(); |
+ case REGEX_EXTENSION: return GetExtensionRegex(); |
+ default: |
+ NOTREACHED(); |
+ break; |
+ } |
+ return string16(); |
+} |
+ |
// static |
bool PhoneField::ParseInternal( |
PhoneField *phone_field, |
@@ -141,79 +256,54 @@ |
return false; |
std::vector<AutoFillField*>::const_iterator q = *iter; |
+ |
// The form owns the following variables, so they should not be deleted. |
- AutoFillField* phone = NULL; |
- AutoFillField* phone2 = NULL; |
- AutoFillField* phone3 = NULL; |
- bool area_code = false; // true if we've parsed an area code field. |
+ AutoFillField* parsed_fields[FIELD_MAX]; |
- // Some pages, such as BloomingdalesShipping.html, have a field labeled |
- // "Area Code and Phone"; we want to parse this as a phone number field so |
- // we look for "phone" before we look for "area code". |
- if (ParseText(&q, phone_field->GetPhoneRegex(), &phone)) { |
- area_code = false; |
- // Check the case when the match is for non-home phone and area code, e.g. |
- // first field is a "Fax area code" and the subsequent is "Fax phone". |
- if (!regular_phone) { |
- // Attempt parsing of the same field as an area code and then phone: |
- std::vector<AutoFillField*>::const_iterator temp_it = *iter; |
- AutoFillField* tmp_phone1 = NULL; |
- AutoFillField* tmp_phone2 = NULL; |
- if (ParseText(&temp_it, phone_field->GetAreaRegex(), &tmp_phone1) && |
- ParseText(&temp_it, phone_field->GetPhoneRegex(), &tmp_phone2)) { |
- phone = tmp_phone1; |
- phone2 = tmp_phone2; |
- q = temp_it; |
- area_code = true; |
+ for (size_t i = 0; i < arraysize(phone_field_grammars_); ++i) { |
+ memset(parsed_fields, 0, sizeof(AutoFillField*) * FIELD_MAX); |
dhollowa
2011/02/16 00:25:37
sizeof(parsed_phone_fields_)
GeorgeY
2011/02/16 20:53:34
Done.
|
+ q = *iter; |
+ // Attempt to parse next possible match. |
+ for (; i < arraysize(phone_field_grammars_) && |
+ phone_field_grammars_[i].regex != REGEX_SEPARATOR; ++i) { |
+ if (!ParseText(&q, phone_field->GetRegExp(phone_field_grammars_[i].regex), |
+ &parsed_fields[phone_field_grammars_[i].field_id])) |
+ break; |
+ if (phone_field_grammars_[i].max_size && |
+ (!parsed_fields[phone_field_grammars_[i].field_id]->max_length() || |
+ phone_field_grammars_[i].max_size < |
dhollowa
2011/02/16 00:25:37
nit: indentation looks wrong here.
GeorgeY
2011/02/16 20:53:34
Indented additional 2 spaces.
|
+ parsed_fields[phone_field_grammars_[i].field_id]->max_length())) { |
+ break; |
} |
} |
- } else { |
- if (!ParseText(&q, phone_field->GetAreaRegex(), &phone)) |
- return false; |
- area_code = true; |
- // If this is not a home phone and there was no specification before |
- // the phone number actually starts (e.g. field 1 "Area code:", field 2 |
- // "Fax:"), we skip searching for preffix and suffix and bail out. |
- if (!ParseText(&q, phone_field->GetPhoneRegex(), &phone2) && !regular_phone) |
- return false; |
+ if (i >= arraysize(phone_field_grammars_)) |
+ return false; // Parsing failed. |
+ if (phone_field_grammars_[i].regex == REGEX_SEPARATOR) |
+ break; // Parsing succeeded. |
+ do { |
+ ++i; |
+ } while (phone_field_grammars_[i].regex != REGEX_SEPARATOR); |
} |
+ if (!parsed_fields[FIELD_PHONE]) |
+ return false; |
Ilya Sherman
2011/02/16 09:33:22
Seems like we could fill parsed_fields[FIELD_PHONE
GeorgeY
2011/02/16 20:53:34
Suffix is optional - it gets parsed iff (if and on
|
- // Sometimes phone number fields are separated by "-" (e.g. test page |
- // Crate and Barrel Check Out.html). Also, area codes are sometimes |
- // surrounded by parentheses, so a ")" may appear after the area code field. |
- // |
- // We used to match "tel" here, which we've seen in field names (e.g. on |
- // Newegg2.html), but that's too general: some pages (e.g. |
- // uk/Furniture123-1.html) have several phone numbers in succession and we |
- // don't want those to be parsed as components of a single phone number. |
- if (phone2 == NULL) |
- ParseText(&q, phone_field->GetPrefixRegex(), &phone2); |
+ for (int i = 0; i < FIELD_MAX; ++i) |
+ phone_field->parsed_phone_fields_[i] = parsed_fields[i]; |
+ // Look for optional fields. |
+ |
// Look for a third text box. |
- if (phone2) |
- ParseText(&q, phone_field->GetSuffixRegex(), &phone3); |
- |
- // Now we have one, two, or three phone number text fields. Package them |
- // up into a PhoneField object. |
- |
- if (phone2 == NULL) { // only one field |
- if (area_code) { |
- // It's an area code - it doesn't make sense. |
- return false; |
+ if (!phone_field->parsed_phone_fields_[FIELD_SUFFIX]) { |
+ if (!ParseText(&q, phone_field->GetSuffixRegex(), |
+ &phone_field->parsed_phone_fields_[FIELD_SUFFIX])) { |
+ ParseText(&q, phone_field->GetSuffixSeparatorRegex(), |
+ &phone_field->parsed_phone_fields_[FIELD_SUFFIX]); |
} |
- phone_field->phone_ = phone; |
- } else { |
- phone_field->area_code_ = phone; |
- if (phone3 == NULL) { // two fields |
- phone_field->phone_ = phone2; |
- } else { // three boxes: area code, prefix and suffix |
- phone_field->prefix_ = phone2; |
- phone_field->phone_ = phone3; |
- } |
} |
// Now look for an extension. |
- ParseText(&q, phone_field->GetExtensionRegex(), &phone_field->extension_); |
+ ParseText(&q, phone_field->GetExtensionRegex(), |
+ &phone_field->parsed_phone_fields_[FIELD_EXTENSION]); |
*iter = q; |
return true; |