Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(705)

Side by Side Diff: chrome/browser/autofill/address_field.cc

Issue 7531023: Improve Autofill heuristics when detecting labels from previous elements. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebase Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/autofill/address_field.h" 5 #include "chrome/browser/autofill/address_field.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
11 #include "base/string16.h" 11 #include "base/string16.h"
12 #include "base/string_util.h" 12 #include "base/string_util.h"
13 #include "base/utf_string_conversions.h" 13 #include "base/utf_string_conversions.h"
14 #include "chrome/browser/autofill/autofill_ecml.h" 14 #include "chrome/browser/autofill/autofill_ecml.h"
15 #include "chrome/browser/autofill/autofill_field.h" 15 #include "chrome/browser/autofill/autofill_field.h"
16 #include "chrome/browser/autofill/autofill_scanner.h" 16 #include "chrome/browser/autofill/autofill_scanner.h"
17 #include "grit/autofill_resources.h" 17 #include "grit/autofill_resources.h"
18 #include "ui/base/l10n/l10n_util.h" 18 #include "ui/base/l10n/l10n_util.h"
19 19
20 using autofill::GetEcmlPattern; 20 using autofill::GetEcmlPattern;
21 21
22 FormField* AddressField::Parse(AutofillScanner* scanner, bool is_ecml) { 22 FormField* AddressField::Parse(AutofillScanner* scanner, bool is_ecml) {
23 if (scanner->IsEnd()) 23 if (scanner->IsEnd())
24 return NULL; 24 return NULL;
25 25
26 scoped_ptr<AddressField> address_field(new AddressField); 26 scoped_ptr<AddressField> address_field(new AddressField);
27 const AutofillField* initial_field = scanner->Cursor(); 27 const AutofillField* const initial_field = scanner->Cursor();
28 scanner->SaveCursor(); 28 size_t saved_cursor = scanner->SaveCursor();
29 29
30 string16 attention_ignored = 30 string16 attention_ignored =
31 l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE); 31 l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE);
32 string16 region_ignored = 32 string16 region_ignored =
33 l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE); 33 l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE);
34 34
35 // Allow address fields to appear in any order. 35 // Allow address fields to appear in any order.
36 size_t begin_trailing_non_labeled_fields = 0;
37 bool has_trailing_non_labeled_fields = false;
36 while (!scanner->IsEnd()) { 38 while (!scanner->IsEnd()) {
39 const size_t cursor = scanner->SaveCursor();
37 if (ParseAddressLines(scanner, is_ecml, address_field.get()) || 40 if (ParseAddressLines(scanner, is_ecml, address_field.get()) ||
38 ParseCity(scanner, is_ecml, address_field.get()) || 41 ParseCity(scanner, is_ecml, address_field.get()) ||
39 ParseState(scanner, is_ecml, address_field.get()) || 42 ParseState(scanner, is_ecml, address_field.get()) ||
40 ParseZipCode(scanner, is_ecml, address_field.get()) || 43 ParseZipCode(scanner, is_ecml, address_field.get()) ||
41 ParseCountry(scanner, is_ecml, address_field.get()) || 44 ParseCountry(scanner, is_ecml, address_field.get()) ||
42 ParseCompany(scanner, is_ecml, address_field.get())) { 45 ParseCompany(scanner, is_ecml, address_field.get())) {
46 has_trailing_non_labeled_fields = false;
43 continue; 47 continue;
44 } else if (ParseField(scanner, attention_ignored, NULL) || 48 } else if (ParseField(scanner, attention_ignored, NULL) ||
45 ParseField(scanner, region_ignored, NULL)) { 49 ParseField(scanner, region_ignored, NULL)) {
46 // We ignore the following: 50 // We ignore the following:
47 // * Attention. 51 // * Attention.
48 // * Province/Region/Other. 52 // * Province/Region/Other.
49 continue; 53 continue;
50 } else if (scanner->Cursor() != initial_field && 54 } else if (scanner->Cursor() != initial_field &&
51 ParseEmptyLabel(scanner, NULL)) { 55 ParseEmptyLabel(scanner, NULL)) {
52 // Ignore non-labeled fields within an address; the page 56 // Ignore non-labeled fields within an address; the page
53 // MapQuest Driving Directions North America.html contains such a field. 57 // MapQuest Driving Directions North America.html contains such a field.
54 // We only ignore such fields after we've parsed at least one other field; 58 // We only ignore such fields after we've parsed at least one other field;
55 // otherwise we'd effectively parse address fields before other field 59 // otherwise we'd effectively parse address fields before other field
56 // types after any non-labeled fields, and we want email address fields to 60 // types after any non-labeled fields, and we want email address fields to
57 // have precedence since some pages contain fields labeled 61 // have precedence since some pages contain fields labeled
58 // "Email address". 62 // "Email address".
63 if (!has_trailing_non_labeled_fields) {
64 has_trailing_non_labeled_fields = true;
65 begin_trailing_non_labeled_fields = cursor;
66 }
67
59 continue; 68 continue;
60 } else { 69 } else {
61 // No field found. 70 // No field found.
62 break; 71 break;
63 } 72 }
64 } 73 }
65 74
66 // If we have identified any address fields in this field then it should be 75 // If we have identified any address fields in this field then it should be
67 // added to the list of fields. 76 // added to the list of fields.
68 if (address_field->company_ != NULL || 77 if (address_field->company_ != NULL ||
69 address_field->address1_ != NULL || address_field->address2_ != NULL || 78 address_field->address1_ != NULL || address_field->address2_ != NULL ||
70 address_field->city_ != NULL || address_field->state_ != NULL || 79 address_field->city_ != NULL || address_field->state_ != NULL ||
71 address_field->zip_ != NULL || address_field->zip4_ || 80 address_field->zip_ != NULL || address_field->zip4_ ||
72 address_field->country_ != NULL) { 81 address_field->country_ != NULL) {
82 // Don't slurp non-labeled fields at the end into the address.
83 if (has_trailing_non_labeled_fields)
84 scanner->RewindTo(begin_trailing_non_labeled_fields);
85
73 address_field->type_ = address_field->FindType(); 86 address_field->type_ = address_field->FindType();
74 return address_field.release(); 87 return address_field.release();
75 } 88 }
76 89
77 scanner->Rewind(); 90 scanner->RewindTo(saved_cursor);
78 return NULL; 91 return NULL;
79 } 92 }
80 93
81 AddressType AddressField::FindType() const { 94 AddressType AddressField::FindType() const {
82 // First look at the field name, which itself will sometimes contain 95 // First look at the field name, which itself will sometimes contain
83 // "bill" or "ship". We could check for the ECML type prefixes 96 // "bill" or "ship". We could check for the ECML type prefixes
84 // here, but there's no need to since ECML's prefixes Ecom_BillTo 97 // here, but there's no need to since ECML's prefixes Ecom_BillTo
85 // and Ecom_ShipTo contain "bill" and "ship" anyway. 98 // and Ecom_ShipTo contain "bill" and "ship" anyway.
86 if (company_) { 99 if (company_) {
87 string16 name = StringToLowerASCII(company_->name); 100 string16 name = StringToLowerASCII(company_->name);
(...skipping 301 matching lines...) Expand 10 before | Expand all | Expand 10 after
389 return kBillingAddress; 402 return kBillingAddress;
390 403
391 if (bill == string16::npos && ship != string16::npos) 404 if (bill == string16::npos && ship != string16::npos)
392 return kShippingAddress; 405 return kShippingAddress;
393 406
394 if (bill > ship) 407 if (bill > ship)
395 return kBillingAddress; 408 return kBillingAddress;
396 409
397 return kShippingAddress; 410 return kShippingAddress;
398 } 411 }
OLDNEW
« no previous file with comments | « no previous file | chrome/browser/autofill/autofill_resources.grd » ('j') | chrome/renderer/autofill/form_manager.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698