Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Side by Side Diff: chrome/browser/autofill/address_field.cc

Issue 6026010: Autofill heuristics regular expressions should be stored in external data files. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Nits Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | chrome/browser/autofill/autofill_resources.grd » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/autofill/address_field.h" 5 #include "chrome/browser/autofill/address_field.h"
6 6
7 #include "app/l10n_util.h"
7 #include "base/logging.h" 8 #include "base/logging.h"
8 #include "base/scoped_ptr.h" 9 #include "base/scoped_ptr.h"
9 #include "base/string16.h" 10 #include "base/string16.h"
10 #include "base/string_util.h" 11 #include "base/string_util.h"
11 #include "base/utf_string_conversions.h" 12 #include "base/utf_string_conversions.h"
12 #include "chrome/browser/autofill/autofill_field.h" 13 #include "chrome/browser/autofill/autofill_field.h"
14 #include "grit/autofill_resources.h"
13 15
14 bool AddressField::GetFieldInfo(FieldTypeMap* field_type_map) const { 16 bool AddressField::GetFieldInfo(FieldTypeMap* field_type_map) const {
15 AutoFillFieldType address_company; 17 AutoFillFieldType address_company;
16 AutoFillFieldType address_line1; 18 AutoFillFieldType address_line1;
17 AutoFillFieldType address_line2; 19 AutoFillFieldType address_line2;
18 AutoFillFieldType address_appt_num; 20 AutoFillFieldType address_appt_num;
19 AutoFillFieldType address_city; 21 AutoFillFieldType address_city;
20 AutoFillFieldType address_state; 22 AutoFillFieldType address_state;
21 AutoFillFieldType address_zip; 23 AutoFillFieldType address_zip;
22 AutoFillFieldType address_country; 24 AutoFillFieldType address_country;
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
83 85
84 scoped_ptr<AddressField> address_field(new AddressField); 86 scoped_ptr<AddressField> address_field(new AddressField);
85 std::vector<AutoFillField*>::const_iterator q = *iter; 87 std::vector<AutoFillField*>::const_iterator q = *iter;
86 string16 pattern; 88 string16 pattern;
87 89
88 // The ECML standard uses 2 letter country codes. So we will 90 // The ECML standard uses 2 letter country codes. So we will
89 // have to remember that this is an ECML form, for when we fill 91 // have to remember that this is an ECML form, for when we fill
90 // it out. 92 // it out.
91 address_field->is_ecml_ = is_ecml; 93 address_field->is_ecml_ = is_ecml;
92 94
95 string16 attention_ignored =
96 l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE);
97 string16 region_ignored =
98 l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE);
99
93 // Allow address fields to appear in any order. 100 // Allow address fields to appear in any order.
94 while (true) { 101 while (true) {
95 if (ParseCompany(&q, is_ecml, address_field.get()) || 102 if (ParseCompany(&q, is_ecml, address_field.get()) ||
96 ParseAddressLines(&q, is_ecml, address_field.get()) || 103 ParseAddressLines(&q, is_ecml, address_field.get()) ||
97 ParseCity(&q, is_ecml, address_field.get()) || 104 ParseCity(&q, is_ecml, address_field.get()) ||
98 ParseState(&q, is_ecml, address_field.get()) || 105 ParseState(&q, is_ecml, address_field.get()) ||
99 ParseZipCode(&q, is_ecml, address_field.get()) || 106 ParseZipCode(&q, is_ecml, address_field.get()) ||
100 ParseCountry(&q, is_ecml, address_field.get())) { 107 ParseCountry(&q, is_ecml, address_field.get())) {
101 continue; 108 continue;
102 } else if (ParseText(&q, ASCIIToUTF16("attention|attn.")) || 109 } else if (ParseText(&q, attention_ignored) ||
103 ParseText(&q, ASCIIToUTF16("province|region|other"))) { 110 ParseText(&q, region_ignored)) {
104 // We ignore the following: 111 // We ignore the following:
105 // * Attention. 112 // * Attention.
106 // * Province/Region/Other. 113 // * Province/Region/Other.
107 continue; 114 continue;
108 } else if (*q != **iter && ParseEmpty(&q)) { 115 } else if (*q != **iter && ParseEmpty(&q)) {
109 // Ignore non-labeled fields within an address; the page 116 // Ignore non-labeled fields within an address; the page
110 // MapQuest Driving Directions North America.html contains such a field. 117 // MapQuest Driving Directions North America.html contains such a field.
111 // We only ignore such fields after we've parsed at least one other field; 118 // We only ignore such fields after we've parsed at least one other field;
112 // otherwise we'd effectively parse address fields before other field 119 // otherwise we'd effectively parse address fields before other field
113 // types after any non-labeled fields, and we want email address fields to 120 // types after any non-labeled fields, and we want email address fields to
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
166 std::vector<AutoFillField*>::const_iterator* iter, 173 std::vector<AutoFillField*>::const_iterator* iter,
167 bool is_ecml, AddressField* address_field) { 174 bool is_ecml, AddressField* address_field) {
168 if (address_field->company_ && !address_field->company_->IsEmpty()) 175 if (address_field->company_ && !address_field->company_->IsEmpty())
169 return false; 176 return false;
170 177
171 string16 pattern; 178 string16 pattern;
172 if (is_ecml) 179 if (is_ecml)
173 pattern = GetEcmlPattern(kEcmlShipToCompanyName, 180 pattern = GetEcmlPattern(kEcmlShipToCompanyName,
174 kEcmlBillToCompanyName, '|'); 181 kEcmlBillToCompanyName, '|');
175 else 182 else
176 pattern = ASCIIToUTF16("company|business name"); 183 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE);
177 184
178 if (!ParseText(iter, pattern, &address_field->company_)) 185 if (!ParseText(iter, pattern, &address_field->company_))
179 return false; 186 return false;
180 187
181 return true; 188 return true;
182 } 189 }
183 190
184 // static 191 // static
185 bool AddressField::ParseAddressLines( 192 bool AddressField::ParseAddressLines(
186 std::vector<AutoFillField*>::const_iterator* iter, 193 std::vector<AutoFillField*>::const_iterator* iter,
187 bool is_ecml, AddressField* address_field) { 194 bool is_ecml, AddressField* address_field) {
188 // We only match the string "address" in page text, not in element names, 195 // We only match the string "address" in page text, not in element names,
189 // because sometimes every element in a group of address fields will have 196 // because sometimes every element in a group of address fields will have
190 // a name containing the string "address"; for example, on the page 197 // a name containing the string "address"; for example, on the page
191 // Kohl's - Register Billing Address.html the text element labeled "city" 198 // Kohl's - Register Billing Address.html the text element labeled "city"
192 // has the name "BILL_TO_ADDRESS<>city". We do match address labels 199 // has the name "BILL_TO_ADDRESS<>city". We do match address labels
193 // such as "address1", which appear as element names on various pages (eg 200 // such as "address1", which appear as element names on various pages (eg
194 // AmericanGirl-Registration.html, BloomingdalesBilling.html, 201 // AmericanGirl-Registration.html, BloomingdalesBilling.html,
195 // EBay Registration Enter Information.html). 202 // EBay Registration Enter Information.html).
196 if (address_field->address1_) 203 if (address_field->address1_)
197 return false; 204 return false;
198 205
199 string16 pattern; 206 string16 pattern;
200 if (is_ecml) { 207 if (is_ecml) {
201 pattern = GetEcmlPattern(kEcmlShipToAddress1, 208 pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|');
202 kEcmlBillToAddress1, '|');
203 if (!ParseText(iter, pattern, &address_field->address1_)) 209 if (!ParseText(iter, pattern, &address_field->address1_))
204 return false; 210 return false;
205 } else { 211 } else {
206 pattern = 212 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE);
207 ASCIIToUTF16("address.?line|address1|addr1|street"); 213 string16 label_pattern =
208 string16 label_pattern = ASCIIToUTF16("address"); 214 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE);
209 215
210 if (!ParseText(iter, pattern, &address_field->address1_)) 216 if (!ParseText(iter, pattern, &address_field->address1_))
211 if (!ParseLabelText(iter, label_pattern, &address_field->address1_)) 217 if (!ParseLabelText(iter, label_pattern, &address_field->address1_))
212 return false; 218 return false;
213 } 219 }
214 220
215 // Optionally parse more address lines, which may have empty labels. 221 // Optionally parse more address lines, which may have empty labels.
216 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) 222 // Some pages have 3 address lines (eg SharperImageModifyAccount.html)
217 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! 223 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)!
218 if (is_ecml) { 224 if (is_ecml) {
219 pattern = GetEcmlPattern(kEcmlShipToAddress2, 225 pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|');
220 kEcmlBillToAddress2, '|');
221 if (!ParseEmptyText(iter, &address_field->address2_)) 226 if (!ParseEmptyText(iter, &address_field->address2_))
222 ParseText(iter, pattern, &address_field->address2_); 227 ParseText(iter, pattern, &address_field->address2_);
223 } else { 228 } else {
224 pattern = ASCIIToUTF16("address.?line2|address2|addr2|street|suite|unit"); 229 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE);
225 string16 label_pattern = ASCIIToUTF16("address"); 230 string16 label_pattern =
231 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE);
226 if (!ParseEmptyText(iter, &address_field->address2_)) 232 if (!ParseEmptyText(iter, &address_field->address2_))
227 if (!ParseText(iter, pattern, &address_field->address2_)) 233 if (!ParseText(iter, pattern, &address_field->address2_))
228 ParseLabelText(iter, label_pattern, &address_field->address2_); 234 ParseLabelText(iter, label_pattern, &address_field->address2_);
229 } 235 }
230 236
231 // Try for a third line, which we will promptly discard. 237 // Try for a third line, which we will promptly discard.
232 if (address_field->address2_ != NULL) { 238 if (address_field->address2_ != NULL) {
233 if (is_ecml) { 239 if (is_ecml) {
234 pattern = GetEcmlPattern(kEcmlShipToAddress3, 240 pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|');
235 kEcmlBillToAddress3, '|');
236 ParseText(iter, pattern); 241 ParseText(iter, pattern);
237 } else { 242 } else {
238 pattern = ASCIIToUTF16("address.?line3|address3|addr3|street|line3"); 243 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE);
239 if (!ParseEmptyText(iter, NULL)) 244 if (!ParseEmptyText(iter, NULL))
240 ParseText(iter, pattern, NULL); 245 ParseText(iter, pattern, NULL);
241 } 246 }
242 } 247 }
243 248
244 return true; 249 return true;
245 } 250 }
246 251
247 // static 252 // static
248 bool AddressField::ParseCountry( 253 bool AddressField::ParseCountry(
249 std::vector<AutoFillField*>::const_iterator* iter, 254 std::vector<AutoFillField*>::const_iterator* iter,
250 bool is_ecml, AddressField* address_field) { 255 bool is_ecml, AddressField* address_field) {
251 // Parse a country. The occasional page (e.g. 256 // Parse a country. The occasional page (e.g.
252 // Travelocity_New Member Information1.html) calls this a "location". 257 // Travelocity_New Member Information1.html) calls this a "location".
253 // Note: ECML standard uses 2 letter country code (ISO 3166) 258 // Note: ECML standard uses 2 letter country code (ISO 3166)
254 if (address_field->country_ && !address_field->country_->IsEmpty()) 259 if (address_field->country_ && !address_field->country_->IsEmpty())
255 return false; 260 return false;
256 261
257 string16 pattern; 262 string16 pattern;
258 if (is_ecml) 263 if (is_ecml)
259 pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|'); 264 pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|');
260 else 265 else
261 pattern = ASCIIToUTF16("country|location"); 266 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE);
262 267
263 if (!ParseText(iter, pattern, &address_field->country_)) 268 if (!ParseText(iter, pattern, &address_field->country_))
264 return false; 269 return false;
265 270
266 return true; 271 return true;
267 } 272 }
268 273
269 // static 274 // static
270 bool AddressField::ParseZipCode( 275 bool AddressField::ParseZipCode(
271 std::vector<AutoFillField*>::const_iterator* iter, 276 std::vector<AutoFillField*>::const_iterator* iter,
272 bool is_ecml, AddressField* address_field) { 277 bool is_ecml, AddressField* address_field) {
273 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this 278 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this
274 // is called a "post code". 279 // is called a "post code".
275 // 280 //
276 // HACK: Just for the MapQuest driving directions page we match the 281 // HACK: Just for the MapQuest driving directions page we match the
277 // exact name "1z", which MapQuest uses to label its zip code field. 282 // exact name "1z", which MapQuest uses to label its zip code field.
278 // Hopefully before long we'll be smart enough to find the zip code 283 // Hopefully before long we'll be smart enough to find the zip code
279 // on that page automatically. 284 // on that page automatically.
280 if (address_field->zip_) 285 if (address_field->zip_)
281 return false; 286 return false;
282 287
283 // We may be out of fields. 288 // We may be out of fields.
284 if (!**iter) 289 if (!**iter)
285 return false; 290 return false;
286 291
287 string16 pattern; 292 string16 pattern;
288 if (is_ecml) { 293 if (is_ecml) {
289 pattern = GetEcmlPattern(kEcmlShipToPostalCode, 294 pattern = GetEcmlPattern(kEcmlShipToPostalCode, kEcmlBillToPostalCode, '|');
290 kEcmlBillToPostalCode, '|');
291 } else { 295 } else {
292 pattern = ASCIIToUTF16("zip|postal|post code|pcode|^1z$"); 296 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_CODE_RE);
293 } 297 }
294 298
295 AddressType tempType; 299 AddressType tempType;
296 string16 name = (**iter)->name(); 300 string16 name = (**iter)->name();
297 301
298 // Note: comparisons using the ecml compliant name as a prefix must be used in 302 // Note: comparisons using the ecml compliant name as a prefix must be used in
299 // order to accommodate Google Checkout. See FormFieldSet::GetEcmlPattern for 303 // order to accommodate Google Checkout. See FormFieldSet::GetEcmlPattern for
300 // more detail. 304 // more detail.
301 string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode)); 305 string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode));
302 if (StartsWith(name, bill_to_postal_code_field, false)) { 306 if (StartsWith(name, bill_to_postal_code_field, false)) {
303 tempType = kBillingAddress; 307 tempType = kBillingAddress;
304 } else if (StartsWith(name, bill_to_postal_code_field, false)) { 308 } else if (StartsWith(name, bill_to_postal_code_field, false)) {
305 tempType = kShippingAddress; 309 tempType = kShippingAddress;
306 } else { 310 } else {
307 tempType = kGenericAddress; 311 tempType = kGenericAddress;
308 } 312 }
309 313
310 if (!ParseText(iter, pattern, &address_field->zip_)) 314 if (!ParseText(iter, pattern, &address_field->zip_))
311 return false; 315 return false;
312 316
313 address_field->type_ = tempType; 317 address_field->type_ = tempType;
314 if (!is_ecml) { 318 if (!is_ecml) {
315 // Look for a zip+4, whose field name will also often contain 319 // Look for a zip+4, whose field name will also often contain
316 // the substring "zip". 320 // the substring "zip".
317 ParseText(iter, ASCIIToUTF16("zip|^-$"), &address_field->zip4_); 321 ParseText(iter,
322 l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE),
323 &address_field->zip4_);
318 } 324 }
319 325
320 return true; 326 return true;
321 } 327 }
322 328
323 // static 329 // static
324 bool AddressField::ParseCity( 330 bool AddressField::ParseCity(
325 std::vector<AutoFillField*>::const_iterator* iter, 331 std::vector<AutoFillField*>::const_iterator* iter,
326 bool is_ecml, AddressField* address_field) { 332 bool is_ecml, AddressField* address_field) {
327 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use 333 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use
328 // the term "town". 334 // the term "town".
329 if (address_field->city_) 335 if (address_field->city_)
330 return false; 336 return false;
331 337
332 string16 pattern; 338 string16 pattern;
333 if (is_ecml) 339 if (is_ecml)
334 pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|'); 340 pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|');
335 else 341 else
336 pattern = ASCIIToUTF16("city|town"); 342 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE);
337 343
338 if (!ParseText(iter, pattern, &address_field->city_)) 344 if (!ParseText(iter, pattern, &address_field->city_))
339 return false; 345 return false;
340 346
341 return true; 347 return true;
342 } 348 }
343 349
344 // static 350 // static
345 bool AddressField::ParseState( 351 bool AddressField::ParseState(
346 std::vector<AutoFillField*>::const_iterator* iter, 352 std::vector<AutoFillField*>::const_iterator* iter,
347 bool is_ecml, AddressField* address_field) { 353 bool is_ecml, AddressField* address_field) {
348 if (address_field->state_) 354 if (address_field->state_)
349 return false; 355 return false;
350 356
351 string16 pattern; 357 string16 pattern;
352 if (is_ecml) 358 if (is_ecml)
353 pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|'); 359 pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|');
354 else 360 else
355 pattern = ASCIIToUTF16("state|county"); 361 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE);
356 362
357 if (!ParseText(iter, pattern, &address_field->state_)) 363 if (!ParseText(iter, pattern, &address_field->state_))
358 return false; 364 return false;
359 365
360 return true; 366 return true;
361 } 367 }
362 368
363 AddressType AddressField::AddressTypeFromText(const string16 &text) { 369 AddressType AddressField::AddressTypeFromText(const string16 &text) {
364 if (text.find(ASCIIToUTF16("same as")) != string16::npos || 370 if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE))
365 text.find(ASCIIToUTF16("use my")) != string16::npos) 371 != string16::npos ||
372 text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE))
373 != string16::npos)
366 // This text could be a checkbox label such as "same as my billing 374 // This text could be a checkbox label such as "same as my billing
367 // address" or "use my shipping address". 375 // address" or "use my shipping address".
368 // ++ It would help if we generally skipped all text that appears 376 // ++ It would help if we generally skipped all text that appears
369 // after a check box. 377 // after a check box.
370 return kGenericAddress; 378 return kGenericAddress;
371 379
372 // Not all pages say "billing address" and "shipping address" explicitly; 380 // Not all pages say "billing address" and "shipping address" explicitly;
373 // for example, Craft Catalog1.html has "Bill-to Address" and 381 // for example, Craft Catalog1.html has "Bill-to Address" and
374 // "Ship-to Address". 382 // "Ship-to Address".
375 size_t bill = text.rfind(ASCIIToUTF16("bill")); 383 size_t bill = text.rfind(
376 size_t ship = text.rfind(ASCIIToUTF16("ship")); 384 l10n_util::GetStringUTF16(IDS_AUTOFILL_BILLING_DESIGNATOR_RE));
385 size_t ship = text.rfind(
386 l10n_util::GetStringUTF16(IDS_AUTOFILL_SHIPPING_DESIGNATOR_RE));
377 387
378 if (bill == string16::npos && ship == string16::npos) 388 if (bill == string16::npos && ship == string16::npos)
379 return kGenericAddress; 389 return kGenericAddress;
380 390
381 if (bill != string16::npos && ship == string16::npos) 391 if (bill != string16::npos && ship == string16::npos)
382 return kBillingAddress; 392 return kBillingAddress;
383 393
384 if (bill == string16::npos && ship != string16::npos) 394 if (bill == string16::npos && ship != string16::npos)
385 return kShippingAddress; 395 return kShippingAddress;
386 396
387 if (bill > ship) 397 if (bill > ship)
388 return kBillingAddress; 398 return kBillingAddress;
389 399
390 return kShippingAddress; 400 return kShippingAddress;
391 } 401 }
OLDNEW
« no previous file with comments | « no previous file | chrome/browser/autofill/autofill_resources.grd » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698