Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(716)

Side by Side Diff: chrome/browser/autofill/phone_field.cc

Issue 6480083: Changed parsing code for the phonenumbers fields to incorporate different com... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/autofill/phone_field.h" 5 #include "chrome/browser/autofill/phone_field.h"
6 6
7 #include "base/logging.h" 7 #include "base/logging.h"
8 #include "base/scoped_ptr.h" 8 #include "base/scoped_ptr.h"
9 #include "base/string16.h" 9 #include "base/string16.h"
10 #include "base/string_util.h" 10 #include "base/string_util.h"
11 #include "base/utf_string_conversions.h" 11 #include "base/utf_string_conversions.h"
12 #include "chrome/browser/autofill/autofill_field.h" 12 #include "chrome/browser/autofill/autofill_field.h"
13 #include "chrome/browser/autofill/fax_number.h" 13 #include "chrome/browser/autofill/fax_number.h"
14 #include "chrome/browser/autofill/home_phone_number.h" 14 #include "chrome/browser/autofill/home_phone_number.h"
15 #include "grit/autofill_resources.h" 15 #include "grit/autofill_resources.h"
16 #include "ui/base/l10n/l10n_util.h" 16 #include "ui/base/l10n/l10n_util.h"
17 17
18 // Phone field grammars - first matched grammar will be parsed. Grammars are
19 // separated by { REGEX_SEPARATOR, 0, 0 }. Suffix and extension are parsed
20 // separately unless they are necessary part of the match.
21 PhoneField::Parser PhoneField::phone_field_grammars_[] = {
22 // Country code: CCFIELD Area Code: ACFIELD Phone: PHONE (- SUFFIX (- EXT)?)?
23 { PhoneField::REGEX_COUNTRY, PhoneField::FIELD_COUNTRY_CODE, 0 },
24 { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 },
25 { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
26 { PhoneField::REGEX_SEPARATOR, 0, 0 },
dhollowa 2011/02/16 00:25:37 The second zero should be |FIELD_NULL| and through
Ilya Sherman 2011/02/16 09:33:22 I assume you mean the first zero?
dhollowa 2011/02/16 16:40:03 Yes. On 2011/02/16 09:33:22, Ilya Sherman wrote:
GeorgeY 2011/02/16 20:53:34 Changed to FIELD_NONE
GeorgeY 2011/02/16 20:53:34 Yes, fixed
27 // Phone: CCFIELD ACFIELD - PHONE - SUFFIX (Ext: EXT)?
Ilya Sherman 2011/02/16 09:33:22 I'm trying to understand the comment structure you
GeorgeY 2011/02/16 20:53:34 typo - fixed.
28 { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
29 { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 },
30 { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 },
31 { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 },
32 { PhoneField::REGEX_SEPARATOR, 0, 0 },
33 // Phone: CCFIELD:3 ACFIELD:3 PHONE:3 SUFFIX:4 (- EXT)?
34 { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 3 },
35 { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 3 },
36 { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 },
37 { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 },
38 { PhoneField::REGEX_SEPARATOR, 0, 0 },
39 // Area Code: ACFIELD Phone: PHONE (- SUFFIX (- EXT)?)?
40 { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 },
41 { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
42 { PhoneField::REGEX_SEPARATOR, 0, 0 },
43 // Phone: ACFIELD PHONE:3 SUFFIX:4 (- EXT)?
44 { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 },
45 { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 3 },
46 { PhoneField::REGEX_PHONE, PhoneField::FIELD_SUFFIX, 4 },
47 { PhoneField::REGEX_SEPARATOR, 0, 0 },
48 // Phone: CCFIELD \( ACFIELD \) PHONE (- SUFFIX (- EXT)?)?
49 { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
50 { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 },
51 { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
52 { PhoneField::REGEX_SEPARATOR, 0, 0 },
53 // Phone: \( ACFIELD \) PHONE (- SUFFIX (- EXT)?)?
54 { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
55 { PhoneField::REGEX_AREA_NOTEXT, PhoneField::FIELD_AREA_CODE, 0 },
56 { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
57 { PhoneField::REGEX_SEPARATOR, 0, 0 },
58 // Phone: CCFIELD - ACFIELD - PHONE - SUFFIX (Ext: EXT)?
59 { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
60 { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 },
61 { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
62 { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 0 },
63 { PhoneField::REGEX_SEPARATOR, 0, 0 },
64 // Phone: ACFIELD Prefix: PHONE Suffix: SUFFIX (Ext: EXT)?
65 { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 },
66 { PhoneField::REGEX_PREFIX, PhoneField::FIELD_PHONE, 0 },
67 { PhoneField::REGEX_SUFFIX, PhoneField::FIELD_SUFFIX, 0 },
68 { PhoneField::REGEX_SEPARATOR, 0, 0 },
69 // Phone: ACFIELD - PHONE - SUFFIX (Ext: EXT)?
70 { PhoneField::REGEX_PHONE, PhoneField::FIELD_AREA_CODE, 0 },
71 { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_PHONE, 3 },
72 { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_SUFFIX, 4 },
73 { PhoneField::REGEX_SEPARATOR, 0, 0 },
74 // Phone: CCFIELD - ACFIELD - PHONE (Ext: EXT)?
75 { PhoneField::REGEX_PHONE, PhoneField::FIELD_COUNTRY_CODE, 0 },
76 { PhoneField::REGEX_PREFIX_SEPARATOR, PhoneField::FIELD_AREA_CODE, 0 },
77 { PhoneField::REGEX_SUFFIX_SEPARATOR, PhoneField::FIELD_PHONE, 0 },
78 { PhoneField::REGEX_SEPARATOR, 0, 0 },
79 // Phone: ACFIELD - PHONE (- SUFFIX (- EXT)?)?
80 { PhoneField::REGEX_AREA, PhoneField::FIELD_AREA_CODE, 0 },
81 { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
82 { PhoneField::REGEX_SEPARATOR, 0, 0 },
83 // Phone: PHONE (- SUFFIX (- EXT)?)?
84 { PhoneField::REGEX_PHONE, PhoneField::FIELD_PHONE, 0 },
85 { PhoneField::REGEX_SEPARATOR, 0, 0 },
86 };
Ilya Sherman 2011/02/16 09:33:22 I'm finding this list pretty hard to grok. Do you
GeorgeY 2011/02/16 20:53:34 Probably would be difficult to generate them progr
87
18 // static 88 // static
19 PhoneField* PhoneField::Parse(std::vector<AutoFillField*>::const_iterator* iter, 89 PhoneField* PhoneField::Parse(std::vector<AutoFillField*>::const_iterator* iter,
20 bool is_ecml) { 90 bool is_ecml) {
21 DCHECK(iter); 91 DCHECK(iter);
22 if (!iter) 92 if (!iter)
23 return NULL; 93 return NULL;
24 94
25 if (is_ecml) 95 if (is_ecml)
26 return ParseECML(iter); 96 return ParseECML(iter);
27 97
(...skipping 12 matching lines...) Expand all
40 } 110 }
41 111
42 // static 112 // static
43 PhoneField* PhoneField::ParseECML( 113 PhoneField* PhoneField::ParseECML(
44 std::vector<AutoFillField*>::const_iterator* iter) { 114 std::vector<AutoFillField*>::const_iterator* iter) {
45 string16 pattern(GetEcmlPattern(kEcmlShipToPhone, kEcmlBillToPhone, '|')); 115 string16 pattern(GetEcmlPattern(kEcmlShipToPhone, kEcmlBillToPhone, '|'));
46 116
47 AutoFillField* field; 117 AutoFillField* field;
48 if (ParseText(iter, pattern, &field)) { 118 if (ParseText(iter, pattern, &field)) {
49 PhoneField* phone_field = new PhoneField(); 119 PhoneField* phone_field = new PhoneField();
50 phone_field->phone_ = field; 120 phone_field->parsed_phone_fields_[FIELD_PHONE] = field;
51 return phone_field; 121 return phone_field;
52 } 122 }
53 123
54 return NULL; 124 return NULL;
55 } 125 }
56 126
57 bool PhoneField::GetFieldInfo(FieldTypeMap* field_type_map) const { 127 bool PhoneField::GetFieldInfo(FieldTypeMap* field_type_map) const {
58 bool ok; 128 bool ok = false;
59 129
60 if (area_code_ != NULL) { 130 DCHECK(parsed_phone_fields_[FIELD_PHONE]); // Phone was incorrectly parsed.
Ilya Sherman 2011/02/16 09:33:22 nit: I think you mean "was _correctly_ parsed"?
GeorgeY 2011/02/16 20:53:34 I written when DCHECK hits :). As we should encomp
61 ok = Add(field_type_map, area_code_, 131
62 AutoFillType(number_->GetCityCodeType())); 132 if ((parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) ||
133 (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) ||
134 (parsed_phone_fields_[FIELD_SUFFIX] != NULL)) {
135 if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) {
136 ok = Add(field_type_map,
137 parsed_phone_fields_[FIELD_COUNTRY_CODE],
138 AutoFillType(number_->GetCountryCodeType()));
139 DCHECK(ok);
140 }
141 if (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) {
142 ok = Add(field_type_map,
143 parsed_phone_fields_[FIELD_AREA_CODE],
144 AutoFillType(number_->GetCityCodeType()));
145 DCHECK(ok);
146 }
147 // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form
148 // we fill only the prefix depending on the size of the input field.
149 ok = Add(field_type_map,
150 parsed_phone_fields_[FIELD_PHONE],
151 AutoFillType(number_->GetNumberType()));
63 DCHECK(ok); 152 DCHECK(ok);
64 153 // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form
65 if (prefix_ != NULL) { 154 // we fill only the suffix depending on the size of the input field.
66 // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form 155 if (parsed_phone_fields_[FIELD_SUFFIX] != NULL) {
67 // we fill only the prefix depending on the size of the input field. 156 ok = Add(field_type_map,
68 ok = ok && Add(field_type_map, 157 parsed_phone_fields_[FIELD_SUFFIX],
69 prefix_, 158 AutoFillType(number_->GetNumberType()));
70 AutoFillType(number_->GetNumberType()));
71 DCHECK(ok);
72 // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form
73 // we fill only the suffix depending on the size of the input field.
74 ok = ok && Add(field_type_map,
75 phone_,
76 AutoFillType(number_->GetNumberType()));
77 DCHECK(ok);
78 } else {
79 ok = ok && Add(field_type_map,
80 phone_,
81 AutoFillType(number_->GetNumberType()));
82 DCHECK(ok); 159 DCHECK(ok);
83 } 160 }
84 } else { 161 } else {
85 ok = Add(field_type_map, 162 ok = Add(field_type_map,
86 phone_, 163 parsed_phone_fields_[FIELD_PHONE],
87 AutoFillType(number_->GetWholeNumberType())); 164 AutoFillType(number_->GetWholeNumberType()));
88 DCHECK(ok); 165 DCHECK(ok);
89 } 166 }
90 167
91 return ok; 168 return ok;
92 } 169 }
93 170
94 PhoneField::PhoneField() 171 PhoneField::PhoneField() {
95 : phone_(NULL), 172 memset(parsed_phone_fields_, 0, sizeof(AutoFillField*) * FIELD_MAX);
dhollowa 2011/02/16 00:25:37 How about |sizeof(parsed_phone_fields_)| instead?
GeorgeY 2011/02/16 20:53:34 :) Done. The vector is not there because: 1. The s
Ilya Sherman 2011/02/16 23:22:08 Please remind me: Who owns the pointers? Do we cl
96 area_code_(NULL),
97 prefix_(NULL),
98 extension_(NULL) {
99 SetPhoneType(HOME_PHONE); 173 SetPhoneType(HOME_PHONE);
100 } 174 }
101 175
176 string16 PhoneField::GetCountryRegex() const {
177 // This one is the same for Home and Fax numbers.
178 return l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_CODE_RE);
179 }
180
102 string16 PhoneField::GetAreaRegex() const { 181 string16 PhoneField::GetAreaRegex() const {
103 // This one is the same for Home and Fax numbers. 182 // This one is the same for Home and Fax numbers.
104 return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE); 183 string16 area_code = l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_RE);
184 area_code.append(ASCIIToUTF16("|")); // Regexp separator.
185 area_code.append(GetAreaNoTextRegex());
186 return area_code;
187 }
188
189 string16 PhoneField::GetAreaNoTextRegex() const {
190 // This one is the same for Home and Fax numbers.
191 return l10n_util::GetStringUTF16(IDS_AUTOFILL_AREA_CODE_NOTEXT_RE);
105 } 192 }
106 193
107 string16 PhoneField::GetPhoneRegex() const { 194 string16 PhoneField::GetPhoneRegex() const {
108 if (phone_type_ == HOME_PHONE) 195 if (phone_type_ == HOME_PHONE)
109 return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_RE); 196 return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_RE);
110 else if (phone_type_ == FAX_PHONE) 197 else if (phone_type_ == FAX_PHONE)
111 return l10n_util::GetStringUTF16(IDS_AUTOFILL_FAX_RE); 198 return l10n_util::GetStringUTF16(IDS_AUTOFILL_FAX_RE);
112 else 199 else
113 NOTREACHED(); 200 NOTREACHED();
114 return string16(); 201 return string16();
115 } 202 }
116 203
204 string16 PhoneField::GetPrefixSeparatorRegex() const {
205 // This one is the same for Home and Fax numbers.
206 return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_SEPARATOR_RE);
207 }
208
117 string16 PhoneField::GetPrefixRegex() const { 209 string16 PhoneField::GetPrefixRegex() const {
118 // This one is the same for Home and Fax numbers. 210 // This one is the same for Home and Fax numbers.
119 return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_RE); 211 return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_PREFIX_RE);
120 } 212 }
121 213
214 string16 PhoneField::GetSuffixSeparatorRegex() const {
215 // This one is the same for Home and Fax numbers.
216 return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_SEPARATOR_RE);
217 }
218
122 string16 PhoneField::GetSuffixRegex() const { 219 string16 PhoneField::GetSuffixRegex() const {
123 // This one is the same for Home and Fax numbers. 220 // This one is the same for Home and Fax numbers.
124 return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_RE); 221 return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_SUFFIX_RE);
125 } 222 }
126 223
127 string16 PhoneField::GetExtensionRegex() const { 224 string16 PhoneField::GetExtensionRegex() const {
128 // This one is the same for Home and Fax numbers. 225 // This one is the same for Home and Fax numbers.
129 return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_EXTENSION_RE); 226 return l10n_util::GetStringUTF16(IDS_AUTOFILL_PHONE_EXTENSION_RE);
130 } 227 }
131 228
229 string16 PhoneField::GetRegExp(REGEX_MATCH regex_id) const {
230 switch (regex_id) {
231 case REGEX_COUNTRY: return GetCountryRegex();
232 case REGEX_AREA: return GetAreaRegex();
233 case REGEX_AREA_NOTEXT: return GetAreaNoTextRegex();
234 case REGEX_PHONE: return GetPhoneRegex();
235 case REGEX_PREFIX_SEPARATOR: return GetPrefixSeparatorRegex();
236 case REGEX_PREFIX: return GetPrefixRegex();
237 case REGEX_SUFFIX_SEPARATOR: return GetSuffixSeparatorRegex();
238 case REGEX_SUFFIX: return GetSuffixRegex();
239 case REGEX_EXTENSION: return GetExtensionRegex();
240 default:
241 NOTREACHED();
242 break;
243 }
244 return string16();
245 }
246
132 // static 247 // static
133 bool PhoneField::ParseInternal( 248 bool PhoneField::ParseInternal(
134 PhoneField *phone_field, 249 PhoneField *phone_field,
135 std::vector<AutoFillField*>::const_iterator* iter, 250 std::vector<AutoFillField*>::const_iterator* iter,
136 bool regular_phone) { 251 bool regular_phone) {
137 DCHECK(iter); 252 DCHECK(iter);
138 253
139 DCHECK(phone_field); 254 DCHECK(phone_field);
140 if (!phone_field) 255 if (!phone_field)
141 return false; 256 return false;
142 257
143 std::vector<AutoFillField*>::const_iterator q = *iter; 258 std::vector<AutoFillField*>::const_iterator q = *iter;
259
144 // The form owns the following variables, so they should not be deleted. 260 // The form owns the following variables, so they should not be deleted.
145 AutoFillField* phone = NULL; 261 AutoFillField* parsed_fields[FIELD_MAX];
146 AutoFillField* phone2 = NULL;
147 AutoFillField* phone3 = NULL;
148 bool area_code = false; // true if we've parsed an area code field.
149 262
150 // Some pages, such as BloomingdalesShipping.html, have a field labeled 263 for (size_t i = 0; i < arraysize(phone_field_grammars_); ++i) {
151 // "Area Code and Phone"; we want to parse this as a phone number field so 264 memset(parsed_fields, 0, sizeof(AutoFillField*) * FIELD_MAX);
dhollowa 2011/02/16 00:25:37 sizeof(parsed_phone_fields_)
GeorgeY 2011/02/16 20:53:34 Done.
152 // we look for "phone" before we look for "area code". 265 q = *iter;
153 if (ParseText(&q, phone_field->GetPhoneRegex(), &phone)) { 266 // Attempt to parse next possible match.
154 area_code = false; 267 for (; i < arraysize(phone_field_grammars_) &&
155 // Check the case when the match is for non-home phone and area code, e.g. 268 phone_field_grammars_[i].regex != REGEX_SEPARATOR; ++i) {
156 // first field is a "Fax area code" and the subsequent is "Fax phone". 269 if (!ParseText(&q, phone_field->GetRegExp(phone_field_grammars_[i].regex),
157 if (!regular_phone) { 270 &parsed_fields[phone_field_grammars_[i].field_id]))
158 // Attempt parsing of the same field as an area code and then phone: 271 break;
159 std::vector<AutoFillField*>::const_iterator temp_it = *iter; 272 if (phone_field_grammars_[i].max_size &&
160 AutoFillField* tmp_phone1 = NULL; 273 (!parsed_fields[phone_field_grammars_[i].field_id]->max_length() ||
161 AutoFillField* tmp_phone2 = NULL; 274 phone_field_grammars_[i].max_size <
dhollowa 2011/02/16 00:25:37 nit: indentation looks wrong here.
GeorgeY 2011/02/16 20:53:34 Indented additional 2 spaces.
162 if (ParseText(&temp_it, phone_field->GetAreaRegex(), &tmp_phone1) && 275 parsed_fields[phone_field_grammars_[i].field_id]->max_length())) {
163 ParseText(&temp_it, phone_field->GetPhoneRegex(), &tmp_phone2)) { 276 break;
164 phone = tmp_phone1;
165 phone2 = tmp_phone2;
166 q = temp_it;
167 area_code = true;
168 } 277 }
169 } 278 }
170 } else { 279 if (i >= arraysize(phone_field_grammars_))
171 if (!ParseText(&q, phone_field->GetAreaRegex(), &phone)) 280 return false; // Parsing failed.
172 return false; 281 if (phone_field_grammars_[i].regex == REGEX_SEPARATOR)
173 area_code = true; 282 break; // Parsing succeeded.
174 // If this is not a home phone and there was no specification before 283 do {
175 // the phone number actually starts (e.g. field 1 "Area code:", field 2 284 ++i;
176 // "Fax:"), we skip searching for preffix and suffix and bail out. 285 } while (phone_field_grammars_[i].regex != REGEX_SEPARATOR);
177 if (!ParseText(&q, phone_field->GetPhoneRegex(), &phone2) && !regular_phone)
178 return false;
179 } 286 }
287 if (!parsed_fields[FIELD_PHONE])
288 return false;
Ilya Sherman 2011/02/16 09:33:22 Seems like we could fill parsed_fields[FIELD_PHONE
GeorgeY 2011/02/16 20:53:34 Suffix is optional - it gets parsed iff (if and on
180 289
181 // Sometimes phone number fields are separated by "-" (e.g. test page 290 for (int i = 0; i < FIELD_MAX; ++i)
182 // Crate and Barrel Check Out.html). Also, area codes are sometimes 291 phone_field->parsed_phone_fields_[i] = parsed_fields[i];
183 // surrounded by parentheses, so a ")" may appear after the area code field. 292
184 // 293 // Look for optional fields.
185 // We used to match "tel" here, which we've seen in field names (e.g. on
186 // Newegg2.html), but that's too general: some pages (e.g.
187 // uk/Furniture123-1.html) have several phone numbers in succession and we
188 // don't want those to be parsed as components of a single phone number.
189 if (phone2 == NULL)
190 ParseText(&q, phone_field->GetPrefixRegex(), &phone2);
191 294
192 // Look for a third text box. 295 // Look for a third text box.
193 if (phone2) 296 if (!phone_field->parsed_phone_fields_[FIELD_SUFFIX]) {
194 ParseText(&q, phone_field->GetSuffixRegex(), &phone3); 297 if (!ParseText(&q, phone_field->GetSuffixRegex(),
195 298 &phone_field->parsed_phone_fields_[FIELD_SUFFIX])) {
196 // Now we have one, two, or three phone number text fields. Package them 299 ParseText(&q, phone_field->GetSuffixSeparatorRegex(),
197 // up into a PhoneField object. 300 &phone_field->parsed_phone_fields_[FIELD_SUFFIX]);
198
199 if (phone2 == NULL) { // only one field
200 if (area_code) {
201 // It's an area code - it doesn't make sense.
202 return false;
203 }
204 phone_field->phone_ = phone;
205 } else {
206 phone_field->area_code_ = phone;
207 if (phone3 == NULL) { // two fields
208 phone_field->phone_ = phone2;
209 } else { // three boxes: area code, prefix and suffix
210 phone_field->prefix_ = phone2;
211 phone_field->phone_ = phone3;
212 } 301 }
213 } 302 }
214 303
215 // Now look for an extension. 304 // Now look for an extension.
216 ParseText(&q, phone_field->GetExtensionRegex(), &phone_field->extension_); 305 ParseText(&q, phone_field->GetExtensionRegex(),
306 &phone_field->parsed_phone_fields_[FIELD_EXTENSION]);
217 307
218 *iter = q; 308 *iter = q;
219 return true; 309 return true;
220 } 310 }
221 311
222 void PhoneField::SetPhoneType(PHONE_TYPE phone_type) { 312 void PhoneField::SetPhoneType(PHONE_TYPE phone_type) {
223 // Field types are different as well, so we create a temporary phone number, 313 // Field types are different as well, so we create a temporary phone number,
224 // to get relevant field types. 314 // to get relevant field types.
225 if (phone_type == HOME_PHONE) 315 if (phone_type == HOME_PHONE)
226 number_.reset(new HomePhoneNumber); 316 number_.reset(new HomePhoneNumber);
227 else 317 else
228 number_.reset(new FaxNumber); 318 number_.reset(new FaxNumber);
229 phone_type_ = phone_type; 319 phone_type_ = phone_type;
230 } 320 }
231 321
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698