Chromium Code Reviews| Index: chrome/browser/autofill/phone_field.cc |
| =================================================================== |
| --- chrome/browser/autofill/phone_field.cc (revision 74701) |
| +++ chrome/browser/autofill/phone_field.cc (working copy) |
| @@ -15,6 +15,85 @@ |
| #include "grit/autofill_resources.h" |
| #include "ui/base/l10n/l10n_util.h" |
| +// Phone field grammars - first matched grammar will be parsed. Grammars are |
| +// separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are |
| +// parsed separately unless they are necessary part of the match. |
|
Ilya Sherman
2011/02/16 23:22:08
nit: part -> parts
GeorgeY
2011/02/17 00:21:18
Done.
|
| +// The following comment are indicating the matched pattern: |
|
Ilya Sherman
2011/02/16 23:22:08
nit: Perhaps: "The following notation is used to d
GeorgeY
2011/02/17 00:21:18
Done.
|
| +// <cc> - country code field. |
| +// <ac> - area code field. |
| +// <phone> - phone or prefix. |
| +// <suffix> - suffix. |
| +// <ext> - extension. |
| +// :N means field is limited to N characters, otherwise it is unlimited. |
| +// (pattern <field>)? means patter is optional and matche d separately. |
|
Ilya Sherman
2011/02/16 23:22:08
nit: patter -> pattern, matche d -> matched
GeorgeY
2011/02/17 00:21:18
Done.
|
| +PhoneField::Parser PhoneField::phone_field_grammars_[] = { |
| + // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix> |
| + // (Ext: <ext>)?)? |
| + { PhoneField::REGEX_COUNTRY, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
| + { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 }, |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, |
| + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
| + // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)? |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 }, |
| + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 }, |
| + { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 }, |
| + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
| + // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)? |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 3 }, |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 }, |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 }, |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 }, |
| + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
| + // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)? |
| + { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 }, |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, |
| + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
| + // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)? |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 }, |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 }, |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 }, |
| + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
| + // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)? |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
| + { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 }, |
| + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, |
| + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
| + // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)? |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
| + { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 }, |
| + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, |
| + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
| + // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)? |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
| + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 }, |
| + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, |
| + { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 0 }, |
| + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
| + // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)? |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 }, |
| + { PhoneField::REGEX_PREFIX, PhoneField::FIELD_PHONE, 0 }, |
| + { PhoneField::REGEX_SUFFIX, PhoneField::FIELD_SUFFIX, 0 }, |
| + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
| + // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)? |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 }, |
| + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 }, |
| + { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 }, |
| + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
| + // Phone: <cc> - <ac> - <phone> (Ext: <ext>)? |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 }, |
| + { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 }, |
| + { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 }, |
| + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
| + // Phone: <ac> - <phone> (Ext: <ext>)? |
| + { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 }, |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, |
| + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
| + // Phone: <phone> (Ext: <ext>)? |
| + { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 }, |
| + { PhoneField::REGEX_SEPARATOR, FIELD_NONE, 0 }, |
| +}; |
| + |
| // static |
| PhoneField* PhoneField::Parse(std::vector<AutoFillField*>::const_iterator* iter, |
| bool is_ecml) { |
| @@ -31,7 +110,7 @@ |
| // be the last as it is a catch all case ("fax" and "faxarea" parsed as FAX, |
| // but "area" and "someotherarea" parsed as HOME, for example). |
| for (int i = PHONE_TYPE_MAX - 1; i >= PHONE_TYPE_FIRST; --i) { |
| - phone_field->SetPhoneType(static_cast<PhoneField::PHONE_TYPE>(i)); |
| + phone_field->SetPhoneType(static_cast<PhoneField::PhoneType>(i)); |
| if (ParseInternal(phone_field.get(), iter, i == HOME_PHONE)) |
| return phone_field.release(); |
| } |
| @@ -47,7 +126,7 @@ |
| AutoFillField* field; |
| if (ParseText(iter, pattern, &field)) { |
| PhoneField* phone_field = new PhoneField(); |
| - phone_field->phone_ = field; |
| + phone_field->parsed_phone_fields_[FIELD_PHONE] = field; |
| return phone_field; |
| } |
| @@ -55,35 +134,42 @@ |
| } |
| bool PhoneField::GetFieldInfo(FieldTypeMap* field_type_map) const { |
| - bool ok; |
| + bool ok = false; |
| - if (area_code_ != NULL) { |
| - ok = Add(field_type_map, area_code_, |
| - AutoFillType(number_->GetCityCodeType())); |
| - DCHECK(ok); |
| + DCHECK(parsed_phone_fields_[FIELD_PHONE]); // Phone was correctly parsed. |
| - if (prefix_ != NULL) { |
| - // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form |
| - // we fill only the prefix depending on the size of the input field. |
| - ok = ok && Add(field_type_map, |
| - prefix_, |
| - AutoFillType(number_->GetNumberType())); |
| + if ((parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) || |
| + (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) || |
| + (parsed_phone_fields_[FIELD_SUFFIX] != NULL)) { |
| + if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) { |
| + ok = Add(field_type_map, |
| + parsed_phone_fields_[FIELD_COUNTRY_CODE], |
| + AutoFillType(number_->GetCountryCodeType())); |
| DCHECK(ok); |
| - // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form |
| - // we fill only the suffix depending on the size of the input field. |
| - ok = ok && Add(field_type_map, |
| - phone_, |
| - AutoFillType(number_->GetNumberType())); |
| + } |
| + if (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) { |
| + ok = Add(field_type_map, |
| + parsed_phone_fields_[FIELD_AREA_CODE], |
| + AutoFillType(number_->GetCityCodeType())); |
| DCHECK(ok); |
| - } else { |
| - ok = ok && Add(field_type_map, |
| - phone_, |
| - AutoFillType(number_->GetNumberType())); |
| + } |
| + // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form |
| + // we fill only the prefix depending on the size of the input field. |
| + ok = Add(field_type_map, |
| + parsed_phone_fields_[FIELD_PHONE], |
| + AutoFillType(number_->GetNumberType())); |
| + DCHECK(ok); |
| + // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form |
| + // we fill only the suffix depending on the size of the input field. |
| + if (parsed_phone_fields_[FIELD_SUFFIX] != NULL) { |
| + ok = Add(field_type_map, |
| + parsed_phone_fields_[FIELD_SUFFIX], |
| + AutoFillType(number_->GetNumberType())); |
| DCHECK(ok); |
| } |
| } else { |
| ok = Add(field_type_map, |
| - phone_, |
| + parsed_phone_fields_[FIELD_PHONE], |
| AutoFillType(number_->GetWholeNumberType())); |
| DCHECK(ok); |
| } |
| @@ -91,19 +177,29 @@ |
| return ok; |
| } |
| -PhoneField::PhoneField() |
| - : phone_(NULL), |
| - area_code_(NULL), |
| - prefix_(NULL), |
| - extension_(NULL) { |
| +PhoneField::PhoneField() { |
| + memset(parsed_phone_fields_, 0, sizeof(parsed_phone_fields_)); |
| SetPhoneType(HOME_PHONE); |
| } |
| +string16 PhoneField::GetCountryRegex() const { |
| + // This one is the same for Home and Fax numbers. |
| + return l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_CODE_RE); |
| +} |
| + |
| string16 PhoneField::GetAreaRegex() const { |
| // This one is the same for Home and Fax numbers. |
| - return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE); |
| + string16 area_code = l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE); |
| + area_code.append(ASCIIToUTF16("|")); // Regexp separator. |
| + area_code.append(GetAreaNoTextRegex()); |
| + return area_code; |
| } |
| +string16 PhoneField::GetAreaNoTextRegex() const { |
| + // This one is the same for Home and Fax numbers. |
| + return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_NOTEXT_RE); |
| +} |
| + |
| string16 PhoneField::GetPhoneRegex() const { |
| if (phone_type_ == HOME_PHONE) |
| return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_RE); |
| @@ -114,11 +210,21 @@ |
| return string16(); |
| } |
| +string16 PhoneField::GetPrefixSeparatorRegex() const { |
| + // This one is the same for Home and Fax numbers. |
| + return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_SEPARATOR_RE); |
| +} |
| + |
| string16 PhoneField::GetPrefixRegex() const { |
| // This one is the same for Home and Fax numbers. |
| return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_RE); |
| } |
| +string16 PhoneField::GetSuffixSeparatorRegex() const { |
| + // This one is the same for Home and Fax numbers. |
| + return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_SEPARATOR_RE); |
| +} |
| + |
| string16 PhoneField::GetSuffixRegex() const { |
| // This one is the same for Home and Fax numbers. |
| return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_RE); |
| @@ -129,6 +235,24 @@ |
| return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_EXTENSION_RE); |
| } |
| +string16 PhoneField::GetRegExp(RegexType regex_id) const { |
| + switch (regex_id) { |
| + case REGEX_COUNTRY: return GetCountryRegex(); |
| + case REGEX_AREA: return GetAreaRegex(); |
| + case REGEX_AREA_NOTEXT: return GetAreaNoTextRegex(); |
| + case REGEX_PHONE: return GetPhoneRegex(); |
| + case REGEX_PREFIX_SEPARATOR: return GetPrefixSeparatorRegex(); |
| + case REGEX_PREFIX: return GetPrefixRegex(); |
| + case REGEX_SUFFIX_SEPARATOR: return GetSuffixSeparatorRegex(); |
| + case REGEX_SUFFIX: return GetSuffixRegex(); |
| + case REGEX_EXTENSION: return GetExtensionRegex(); |
| + default: |
| + NOTREACHED(); |
| + break; |
| + } |
| + return string16(); |
| +} |
| + |
| // static |
| bool PhoneField::ParseInternal( |
| PhoneField *phone_field, |
| @@ -141,85 +265,60 @@ |
| return false; |
| std::vector<AutoFillField*>::const_iterator q = *iter; |
| + |
| // The form owns the following variables, so they should not be deleted. |
| - AutoFillField* phone = NULL; |
| - AutoFillField* phone2 = NULL; |
| - AutoFillField* phone3 = NULL; |
| - bool area_code = false; // true if we've parsed an area code field. |
| + AutoFillField* parsed_fields[FIELD_MAX]; |
| - // Some pages, such as BloomingdalesShipping.html, have a field labeled |
| - // "Area Code and Phone"; we want to parse this as a phone number field so |
| - // we look for "phone" before we look for "area code". |
| - if (ParseText(&q, phone_field->GetPhoneRegex(), &phone)) { |
| - area_code = false; |
| - // Check the case when the match is for non-home phone and area code, e.g. |
| - // first field is a "Fax area code" and the subsequent is "Fax phone". |
| - if (!regular_phone) { |
| - // Attempt parsing of the same field as an area code and then phone: |
| - std::vector<AutoFillField*>::const_iterator temp_it = *iter; |
| - AutoFillField* tmp_phone1 = NULL; |
| - AutoFillField* tmp_phone2 = NULL; |
| - if (ParseText(&temp_it, phone_field->GetAreaRegex(), &tmp_phone1) && |
| - ParseText(&temp_it, phone_field->GetPhoneRegex(), &tmp_phone2)) { |
| - phone = tmp_phone1; |
| - phone2 = tmp_phone2; |
| - q = temp_it; |
| - area_code = true; |
| + for (size_t i = 0; i < arraysize(phone_field_grammars_); ++i) { |
| + memset(parsed_fields, 0, sizeof(parsed_fields)); |
| + q = *iter; |
| + // Attempt to parse next possible match. |
| + for (; i < arraysize(phone_field_grammars_) && |
| + phone_field_grammars_[i].regex != REGEX_SEPARATOR; ++i) { |
| + if (!ParseText(&q, phone_field->GetRegExp(phone_field_grammars_[i].regex), |
| + &parsed_fields[phone_field_grammars_[i].phone_part])) |
| + break; |
| + if (phone_field_grammars_[i].max_size && |
| + (!parsed_fields[phone_field_grammars_[i].phone_part]->max_length() || |
| + phone_field_grammars_[i].max_size < |
| + parsed_fields[phone_field_grammars_[i].phone_part]->max_length())) { |
| + break; |
| } |
| } |
| - } else { |
| - if (!ParseText(&q, phone_field->GetAreaRegex(), &phone)) |
| - return false; |
| - area_code = true; |
| - // If this is not a home phone and there was no specification before |
| - // the phone number actually starts (e.g. field 1 "Area code:", field 2 |
| - // "Fax:"), we skip searching for preffix and suffix and bail out. |
| - if (!ParseText(&q, phone_field->GetPhoneRegex(), &phone2) && !regular_phone) |
| - return false; |
| + if (i >= arraysize(phone_field_grammars_)) |
| + return false; // Parsing failed. |
| + if (phone_field_grammars_[i].regex == REGEX_SEPARATOR) |
| + break; // Parsing succeeded. |
| + do { |
| + ++i; |
| + } while (phone_field_grammars_[i].regex != REGEX_SEPARATOR); |
| } |
| + if (!parsed_fields[FIELD_PHONE]) |
| + return false; |
|
Ilya Sherman
2011/02/16 23:22:08
Suppose the final pattern were "// Phone: <ac> - <
GeorgeY
2011/02/17 00:21:18
Nope. We will not get here until we have a complet
Ilya Sherman
2011/02/17 00:35:12
But there is no next pattern after the final patte
GeorgeY
2011/02/17 01:12:24
After the last pattern we bail out of the function
|
| - // Sometimes phone number fields are separated by "-" (e.g. test page |
| - // Crate and Barrel Check Out.html). Also, area codes are sometimes |
| - // surrounded by parentheses, so a ")" may appear after the area code field. |
| - // |
| - // We used to match "tel" here, which we've seen in field names (e.g. on |
| - // Newegg2.html), but that's too general: some pages (e.g. |
| - // uk/Furniture123-1.html) have several phone numbers in succession and we |
| - // don't want those to be parsed as components of a single phone number. |
| - if (phone2 == NULL) |
| - ParseText(&q, phone_field->GetPrefixRegex(), &phone2); |
| + for (int i = 0; i < FIELD_MAX; ++i) |
| + phone_field->parsed_phone_fields_[i] = parsed_fields[i]; |
| + // Look for optional fields. |
| + |
| // Look for a third text box. |
| - if (phone2) |
| - ParseText(&q, phone_field->GetSuffixRegex(), &phone3); |
| - |
| - // Now we have one, two, or three phone number text fields. Package them |
| - // up into a PhoneField object. |
| - |
| - if (phone2 == NULL) { // only one field |
| - if (area_code) { |
| - // It's an area code - it doesn't make sense. |
| - return false; |
| + if (!phone_field->parsed_phone_fields_[FIELD_SUFFIX]) { |
| + if (!ParseText(&q, phone_field->GetSuffixRegex(), |
| + &phone_field->parsed_phone_fields_[FIELD_SUFFIX])) { |
| + ParseText(&q, phone_field->GetSuffixSeparatorRegex(), |
| + &phone_field->parsed_phone_fields_[FIELD_SUFFIX]); |
| } |
| - phone_field->phone_ = phone; |
| - } else { |
| - phone_field->area_code_ = phone; |
| - if (phone3 == NULL) { // two fields |
| - phone_field->phone_ = phone2; |
| - } else { // three boxes: area code, prefix and suffix |
| - phone_field->prefix_ = phone2; |
| - phone_field->phone_ = phone3; |
| - } |
| } |
| // Now look for an extension. |
| - ParseText(&q, phone_field->GetExtensionRegex(), &phone_field->extension_); |
| + ParseText(&q, phone_field->GetExtensionRegex(), |
| + &phone_field->parsed_phone_fields_[FIELD_EXTENSION]); |
| *iter = q; |
| return true; |
| } |
| -void PhoneField::SetPhoneType(PHONE_TYPE phone_type) { |
| +void PhoneField::SetPhoneType(PhoneType phone_type) { |
| // Field types are different as well, so we create a temporary phone number, |
| // to get relevant field types. |
| if (phone_type == HOME_PHONE) |