Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(49)

Side by Side Diff: chrome/browser/autofill/name_field.cc

Issue 7891020: Make autofill regular expressions unicode again. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: remove extra gyp change Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/browser/autofill/email_field.cc ('k') | chrome/browser/autofill/phone_field.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/autofill/name_field.h" 5 #include "chrome/browser/autofill/name_field.h"
6 6
7 #include "base/logging.h" 7 #include "base/logging.h"
8 #include "base/memory/scoped_ptr.h" 8 #include "base/memory/scoped_ptr.h"
9 #include "base/string_util.h" 9 #include "base/string_util.h"
10 #include "base/utf_string_conversions.h" 10 #include "base/utf_string_conversions.h"
11 #include "chrome/browser/autofill/autofill_regex_constants.h"
11 #include "chrome/browser/autofill/autofill_scanner.h" 12 #include "chrome/browser/autofill/autofill_scanner.h"
12 #include "chrome/browser/autofill/autofill_type.h" 13 #include "chrome/browser/autofill/autofill_type.h"
13 #include "ui/base/l10n/l10n_util.h" 14 #include "ui/base/l10n/l10n_util.h"
14 15
15 namespace { 16 namespace {
16 17
17 // The UTF-8 version of these regular expressions are in
18 // regular_expressions.txt.
19 const char kNameIgnoredRe[] =
20 "user.?name|user.?id|nickname|maiden name|title|prefix|suffix"
21 // de-DE
22 "|vollst\xc3\xa4ndiger.?name"
23 // zh-CN
24 "|\xe7\x94\xa8\xe6\x88\xb7\xe5\x90\x8d"
25 // ko-KR
26 "|(\xec\x82\xac\xec\x9a\xa9\xec\x9e\x90.?)?\xec\x95\x84\xec\x9d\xb4\xeb"
27 "\x94\x94|\xec\x82\xac\xec\x9a\xa9\xec\x9e\x90.?ID";
28 const char kNameRe[] =
29 "^name|full.?name|your.?name|customer.?name|firstandlastname|bill.?name"
30 "|ship.?name"
31 // es
32 "|nombre.*y.*apellidos"
33 // fr-FR
34 "|^nom"
35 // ja-JP
36 "|\xe3\x81\x8a\xe5\x90\x8d\xe5\x89\x8d|\xe6\xb0\x8f\xe5\x90\x8d"
37 // pt-BR, pt-PT
38 "|^nome"
39 // zh-CN
40 "|\xe5\xa7\x93\xe5\x90\x8d"
41 // ko-KR
42 "|\xec\x84\xb1\xeb\xaa\x85";
43 const char kNameSpecificRe[] =
44 "^name"
45 // fr-FR
46 "|^nom"
47 // pt-BR, pt-PT
48 "|^nome";
49 const char kFirstNameRe[] =
50 "first.*name|initials|fname|first$"
51 // de-DE
52 "|vorname"
53 // es
54 "|nombre"
55 // fr-FR
56 "|forename|pr\xc3\xa9nom|prenom"
57 // ja-JP
58 "|\xe5\x90\x8d"
59 // pt-BR, pt-PT
60 "|nome"
61 // ru
62 "|\xd0\x98\xd0\xbc\xd1\x8f"
63 // ko-KR
64 "|\xec\x9d\xb4\xeb\xa6\x84";
65 const char kMiddleInitialRe[] = "middle.*initial|m\\.i\\.|mi$|\\bmi\\b";
66 const char kMiddleNameRe[] =
67 "middle.*name|mname|middle$"
68 // es
69 "|apellido.?materno|lastlastname";
70 const char kLastNameRe[] =
71 "last.*name|lname|surname|last$|secondname"
72 // de-DE
73 "|nachname"
74 // es
75 "|apellido"
76 // fr-FR
77 "|famille|^nom"
78 // it-IT
79 "|cognome"
80 // ja-JP
81 "|\xe5\xa7\x93"
82 // pt-BR, pt-PT
83 "|morada|apelidos|surename|sobrenome"
84 // ru
85 "|\xd0\xa4\xd0\xb0\xd0\xbc\xd0\xb8\xd0\xbb\xd0\xb8\xd1\x8f"
86 // ko-KR
87 "|\xec\x84\xb1[^\xeb\xaa\x85]?";
88
89 // A form field that can parse a full name field. 18 // A form field that can parse a full name field.
90 class FullNameField : public NameField { 19 class FullNameField : public NameField {
91 public: 20 public:
92 static FullNameField* Parse(AutofillScanner* scanner); 21 static FullNameField* Parse(AutofillScanner* scanner);
93 22
94 protected: 23 protected:
95 // FormField: 24 // FormField:
96 virtual bool ClassifyField(FieldTypeMap* map) const OVERRIDE; 25 virtual bool ClassifyField(FieldTypeMap* map) const OVERRIDE;
97 26
98 private: 27 private:
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
139 } 68 }
140 69
141 // This is overriden in concrete subclasses. 70 // This is overriden in concrete subclasses.
142 bool NameField::ClassifyField(FieldTypeMap* map) const { 71 bool NameField::ClassifyField(FieldTypeMap* map) const {
143 return false; 72 return false;
144 } 73 }
145 74
146 FullNameField* FullNameField::Parse(AutofillScanner* scanner) { 75 FullNameField* FullNameField::Parse(AutofillScanner* scanner) {
147 // Exclude e.g. "username" or "nickname" fields. 76 // Exclude e.g. "username" or "nickname" fields.
148 scanner->SaveCursor(); 77 scanner->SaveCursor();
149 bool should_ignore = ParseField(scanner, UTF8ToUTF16(kNameIgnoredRe), NULL); 78 bool should_ignore = ParseField(scanner,
79 UTF8ToUTF16(autofill::kNameIgnoredRe), NULL);
150 scanner->Rewind(); 80 scanner->Rewind();
151 if (should_ignore) 81 if (should_ignore)
152 return NULL; 82 return NULL;
153 83
154 // Searching for any label containing the word "name" is too general; 84 // Searching for any label containing the word "name" is too general;
155 // for example, Travelocity_Edit travel profile.html contains a field 85 // for example, Travelocity_Edit travel profile.html contains a field
156 // "Travel Profile Name". 86 // "Travel Profile Name".
157 const AutofillField* field = NULL; 87 const AutofillField* field = NULL;
158 if (ParseField(scanner, UTF8ToUTF16(kNameRe), &field)) 88 if (ParseField(scanner, UTF8ToUTF16(autofill::kNameRe), &field))
159 return new FullNameField(field); 89 return new FullNameField(field);
160 90
161 return NULL; 91 return NULL;
162 } 92 }
163 93
164 bool FullNameField::ClassifyField(FieldTypeMap* map) const { 94 bool FullNameField::ClassifyField(FieldTypeMap* map) const {
165 return AddClassification(field_, NAME_FULL, map); 95 return AddClassification(field_, NAME_FULL, map);
166 } 96 }
167 97
168 FullNameField::FullNameField(const AutofillField* field) 98 FullNameField::FullNameField(const AutofillField* field)
169 : field_(field) { 99 : field_(field) {
170 } 100 }
171 101
172 FirstLastNameField* FirstLastNameField::ParseSpecificName( 102 FirstLastNameField* FirstLastNameField::ParseSpecificName(
173 AutofillScanner* scanner) { 103 AutofillScanner* scanner) {
174 // Some pages (e.g. Overstock_comBilling.html, SmithsonianCheckout.html) 104 // Some pages (e.g. Overstock_comBilling.html, SmithsonianCheckout.html)
175 // have the label "Name" followed by two or three text fields. 105 // have the label "Name" followed by two or three text fields.
176 scoped_ptr<FirstLastNameField> v(new FirstLastNameField); 106 scoped_ptr<FirstLastNameField> v(new FirstLastNameField);
177 scanner->SaveCursor(); 107 scanner->SaveCursor();
178 108
179 const AutofillField* next; 109 const AutofillField* next;
180 if (ParseField(scanner, 110 if (ParseField(scanner,
181 UTF8ToUTF16(kNameSpecificRe), &v->first_name_) && 111 UTF8ToUTF16(autofill::kNameSpecificRe), &v->first_name_) &&
182 ParseEmptyLabel(scanner, &next)) { 112 ParseEmptyLabel(scanner, &next)) {
183 if (ParseEmptyLabel(scanner, &v->last_name_)) { 113 if (ParseEmptyLabel(scanner, &v->last_name_)) {
184 // There are three name fields; assume that the middle one is a 114 // There are three name fields; assume that the middle one is a
185 // middle initial (it is, at least, on SmithsonianCheckout.html). 115 // middle initial (it is, at least, on SmithsonianCheckout.html).
186 v->middle_name_ = next; 116 v->middle_name_ = next;
187 v->middle_initial_ = true; 117 v->middle_initial_ = true;
188 } else { // only two name fields 118 } else { // only two name fields
189 v->last_name_ = next; 119 v->last_name_ = next;
190 } 120 }
191 121
(...skipping 15 matching lines...) Expand all
207 // dell_checkout1.html). At least one UK page (The China Shop2.html) 137 // dell_checkout1.html). At least one UK page (The China Shop2.html)
208 // asks, in stuffy English style, for just initials and a surname, 138 // asks, in stuffy English style, for just initials and a surname,
209 // so we match "initials" here (and just fill in a first name there, 139 // so we match "initials" here (and just fill in a first name there,
210 // American-style). 140 // American-style).
211 // The ".*first$" matches fields ending in "first" (example in sample8.html). 141 // The ".*first$" matches fields ending in "first" (example in sample8.html).
212 // The ".*last$" matches fields ending in "last" (example in sample8.html). 142 // The ".*last$" matches fields ending in "last" (example in sample8.html).
213 143
214 // Allow name fields to appear in any order. 144 // Allow name fields to appear in any order.
215 while (!scanner->IsEnd()) { 145 while (!scanner->IsEnd()) {
216 // Skip over any unrelated fields, e.g. "username" or "nickname". 146 // Skip over any unrelated fields, e.g. "username" or "nickname".
217 if (ParseFieldSpecifics(scanner, UTF8ToUTF16(kNameIgnoredRe), 147 if (ParseFieldSpecifics(scanner, UTF8ToUTF16(autofill::kNameIgnoredRe),
218 MATCH_DEFAULT | MATCH_SELECT, NULL)) { 148 MATCH_DEFAULT | MATCH_SELECT, NULL)) {
219 continue; 149 continue;
220 } 150 }
221 151
222 if (!v->first_name_ && 152 if (!v->first_name_ &&
223 ParseField(scanner, UTF8ToUTF16(kFirstNameRe), &v->first_name_)) { 153 ParseField(scanner, UTF8ToUTF16(autofill::kFirstNameRe),
154 &v->first_name_)) {
224 continue; 155 continue;
225 } 156 }
226 157
227 // We check for a middle initial before checking for a middle name 158 // We check for a middle initial before checking for a middle name
228 // because at least one page (PC Connection.html) has a field marked 159 // because at least one page (PC Connection.html) has a field marked
229 // as both (the label text is "MI" and the element name is 160 // as both (the label text is "MI" and the element name is
230 // "txtmiddlename"); such a field probably actually represents a 161 // "txtmiddlename"); such a field probably actually represents a
231 // middle initial. 162 // middle initial.
232 if (!v->middle_name_ && 163 if (!v->middle_name_ &&
233 ParseField(scanner, UTF8ToUTF16(kMiddleInitialRe), &v->middle_name_)) { 164 ParseField(scanner, UTF8ToUTF16(autofill::kMiddleInitialRe),
165 &v->middle_name_)) {
234 v->middle_initial_ = true; 166 v->middle_initial_ = true;
235 continue; 167 continue;
236 } 168 }
237 169
238 if (!v->middle_name_ && 170 if (!v->middle_name_ &&
239 ParseField(scanner, UTF8ToUTF16(kMiddleNameRe), &v->middle_name_)) { 171 ParseField(scanner, UTF8ToUTF16(autofill::kMiddleNameRe),
172 &v->middle_name_)) {
240 continue; 173 continue;
241 } 174 }
242 175
243 if (!v->last_name_ && 176 if (!v->last_name_ &&
244 ParseField(scanner, UTF8ToUTF16(kLastNameRe), &v->last_name_)) { 177 ParseField(scanner, UTF8ToUTF16(autofill::kLastNameRe),
178 &v->last_name_)) {
245 continue; 179 continue;
246 } 180 }
247 181
248 break; 182 break;
249 } 183 }
250 184
251 // Consider the match to be successful if we detected both first and last name 185 // Consider the match to be successful if we detected both first and last name
252 // fields. 186 // fields.
253 if (v->first_name_ && v->last_name_) 187 if (v->first_name_ && v->last_name_)
254 return v.release(); 188 return v.release();
(...skipping 16 matching lines...) Expand all
271 middle_initial_(false) { 205 middle_initial_(false) {
272 } 206 }
273 207
274 bool FirstLastNameField::ClassifyField(FieldTypeMap* map) const { 208 bool FirstLastNameField::ClassifyField(FieldTypeMap* map) const {
275 bool ok = AddClassification(first_name_, NAME_FIRST, map); 209 bool ok = AddClassification(first_name_, NAME_FIRST, map);
276 ok = ok && AddClassification(last_name_, NAME_LAST, map); 210 ok = ok && AddClassification(last_name_, NAME_LAST, map);
277 AutofillFieldType type = middle_initial_ ? NAME_MIDDLE_INITIAL : NAME_MIDDLE; 211 AutofillFieldType type = middle_initial_ ? NAME_MIDDLE_INITIAL : NAME_MIDDLE;
278 ok = ok && AddClassification(middle_name_, type, map); 212 ok = ok && AddClassification(middle_name_, type, map);
279 return ok; 213 return ok;
280 } 214 }
OLDNEW
« no previous file with comments | « chrome/browser/autofill/email_field.cc ('k') | chrome/browser/autofill/phone_field.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698