OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/autofill/name_field.h" | 5 #include "chrome/browser/autofill/name_field.h" |
6 | 6 |
7 #include "base/logging.h" | 7 #include "base/logging.h" |
8 #include "base/memory/scoped_ptr.h" | 8 #include "base/memory/scoped_ptr.h" |
9 #include "base/string_util.h" | 9 #include "base/string_util.h" |
10 #include "base/utf_string_conversions.h" | 10 #include "base/utf_string_conversions.h" |
| 11 #include "chrome/browser/autofill/autofill_regex_constants.h" |
11 #include "chrome/browser/autofill/autofill_scanner.h" | 12 #include "chrome/browser/autofill/autofill_scanner.h" |
12 #include "chrome/browser/autofill/autofill_type.h" | 13 #include "chrome/browser/autofill/autofill_type.h" |
13 #include "ui/base/l10n/l10n_util.h" | 14 #include "ui/base/l10n/l10n_util.h" |
14 | 15 |
15 namespace { | 16 namespace { |
16 | 17 |
17 // The UTF-8 version of these regular expressions are in | |
18 // regular_expressions.txt. | |
19 const char kNameIgnoredRe[] = | |
20 "user.?name|user.?id|nickname|maiden name|title|prefix|suffix" | |
21 // de-DE | |
22 "|vollst\xc3\xa4ndiger.?name" | |
23 // zh-CN | |
24 "|\xe7\x94\xa8\xe6\x88\xb7\xe5\x90\x8d" | |
25 // ko-KR | |
26 "|(\xec\x82\xac\xec\x9a\xa9\xec\x9e\x90.?)?\xec\x95\x84\xec\x9d\xb4\xeb" | |
27 "\x94\x94|\xec\x82\xac\xec\x9a\xa9\xec\x9e\x90.?ID"; | |
28 const char kNameRe[] = | |
29 "^name|full.?name|your.?name|customer.?name|firstandlastname|bill.?name" | |
30 "|ship.?name" | |
31 // es | |
32 "|nombre.*y.*apellidos" | |
33 // fr-FR | |
34 "|^nom" | |
35 // ja-JP | |
36 "|\xe3\x81\x8a\xe5\x90\x8d\xe5\x89\x8d|\xe6\xb0\x8f\xe5\x90\x8d" | |
37 // pt-BR, pt-PT | |
38 "|^nome" | |
39 // zh-CN | |
40 "|\xe5\xa7\x93\xe5\x90\x8d" | |
41 // ko-KR | |
42 "|\xec\x84\xb1\xeb\xaa\x85"; | |
43 const char kNameSpecificRe[] = | |
44 "^name" | |
45 // fr-FR | |
46 "|^nom" | |
47 // pt-BR, pt-PT | |
48 "|^nome"; | |
49 const char kFirstNameRe[] = | |
50 "first.*name|initials|fname|first$" | |
51 // de-DE | |
52 "|vorname" | |
53 // es | |
54 "|nombre" | |
55 // fr-FR | |
56 "|forename|pr\xc3\xa9nom|prenom" | |
57 // ja-JP | |
58 "|\xe5\x90\x8d" | |
59 // pt-BR, pt-PT | |
60 "|nome" | |
61 // ru | |
62 "|\xd0\x98\xd0\xbc\xd1\x8f" | |
63 // ko-KR | |
64 "|\xec\x9d\xb4\xeb\xa6\x84"; | |
65 const char kMiddleInitialRe[] = "middle.*initial|m\\.i\\.|mi$|\\bmi\\b"; | |
66 const char kMiddleNameRe[] = | |
67 "middle.*name|mname|middle$" | |
68 // es | |
69 "|apellido.?materno|lastlastname"; | |
70 const char kLastNameRe[] = | |
71 "last.*name|lname|surname|last$|secondname" | |
72 // de-DE | |
73 "|nachname" | |
74 // es | |
75 "|apellido" | |
76 // fr-FR | |
77 "|famille|^nom" | |
78 // it-IT | |
79 "|cognome" | |
80 // ja-JP | |
81 "|\xe5\xa7\x93" | |
82 // pt-BR, pt-PT | |
83 "|morada|apelidos|surename|sobrenome" | |
84 // ru | |
85 "|\xd0\xa4\xd0\xb0\xd0\xbc\xd0\xb8\xd0\xbb\xd0\xb8\xd1\x8f" | |
86 // ko-KR | |
87 "|\xec\x84\xb1[^\xeb\xaa\x85]?"; | |
88 | |
89 // A form field that can parse a full name field. | 18 // A form field that can parse a full name field. |
90 class FullNameField : public NameField { | 19 class FullNameField : public NameField { |
91 public: | 20 public: |
92 static FullNameField* Parse(AutofillScanner* scanner); | 21 static FullNameField* Parse(AutofillScanner* scanner); |
93 | 22 |
94 protected: | 23 protected: |
95 // FormField: | 24 // FormField: |
96 virtual bool ClassifyField(FieldTypeMap* map) const OVERRIDE; | 25 virtual bool ClassifyField(FieldTypeMap* map) const OVERRIDE; |
97 | 26 |
98 private: | 27 private: |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
139 } | 68 } |
140 | 69 |
141 // This is overriden in concrete subclasses. | 70 // This is overriden in concrete subclasses. |
142 bool NameField::ClassifyField(FieldTypeMap* map) const { | 71 bool NameField::ClassifyField(FieldTypeMap* map) const { |
143 return false; | 72 return false; |
144 } | 73 } |
145 | 74 |
146 FullNameField* FullNameField::Parse(AutofillScanner* scanner) { | 75 FullNameField* FullNameField::Parse(AutofillScanner* scanner) { |
147 // Exclude e.g. "username" or "nickname" fields. | 76 // Exclude e.g. "username" or "nickname" fields. |
148 scanner->SaveCursor(); | 77 scanner->SaveCursor(); |
149 bool should_ignore = ParseField(scanner, UTF8ToUTF16(kNameIgnoredRe), NULL); | 78 bool should_ignore = ParseField(scanner, |
| 79 UTF8ToUTF16(autofill::kNameIgnoredRe), NULL); |
150 scanner->Rewind(); | 80 scanner->Rewind(); |
151 if (should_ignore) | 81 if (should_ignore) |
152 return NULL; | 82 return NULL; |
153 | 83 |
154 // Searching for any label containing the word "name" is too general; | 84 // Searching for any label containing the word "name" is too general; |
155 // for example, Travelocity_Edit travel profile.html contains a field | 85 // for example, Travelocity_Edit travel profile.html contains a field |
156 // "Travel Profile Name". | 86 // "Travel Profile Name". |
157 const AutofillField* field = NULL; | 87 const AutofillField* field = NULL; |
158 if (ParseField(scanner, UTF8ToUTF16(kNameRe), &field)) | 88 if (ParseField(scanner, UTF8ToUTF16(autofill::kNameRe), &field)) |
159 return new FullNameField(field); | 89 return new FullNameField(field); |
160 | 90 |
161 return NULL; | 91 return NULL; |
162 } | 92 } |
163 | 93 |
164 bool FullNameField::ClassifyField(FieldTypeMap* map) const { | 94 bool FullNameField::ClassifyField(FieldTypeMap* map) const { |
165 return AddClassification(field_, NAME_FULL, map); | 95 return AddClassification(field_, NAME_FULL, map); |
166 } | 96 } |
167 | 97 |
168 FullNameField::FullNameField(const AutofillField* field) | 98 FullNameField::FullNameField(const AutofillField* field) |
169 : field_(field) { | 99 : field_(field) { |
170 } | 100 } |
171 | 101 |
172 FirstLastNameField* FirstLastNameField::ParseSpecificName( | 102 FirstLastNameField* FirstLastNameField::ParseSpecificName( |
173 AutofillScanner* scanner) { | 103 AutofillScanner* scanner) { |
174 // Some pages (e.g. Overstock_comBilling.html, SmithsonianCheckout.html) | 104 // Some pages (e.g. Overstock_comBilling.html, SmithsonianCheckout.html) |
175 // have the label "Name" followed by two or three text fields. | 105 // have the label "Name" followed by two or three text fields. |
176 scoped_ptr<FirstLastNameField> v(new FirstLastNameField); | 106 scoped_ptr<FirstLastNameField> v(new FirstLastNameField); |
177 scanner->SaveCursor(); | 107 scanner->SaveCursor(); |
178 | 108 |
179 const AutofillField* next; | 109 const AutofillField* next; |
180 if (ParseField(scanner, | 110 if (ParseField(scanner, |
181 UTF8ToUTF16(kNameSpecificRe), &v->first_name_) && | 111 UTF8ToUTF16(autofill::kNameSpecificRe), &v->first_name_) && |
182 ParseEmptyLabel(scanner, &next)) { | 112 ParseEmptyLabel(scanner, &next)) { |
183 if (ParseEmptyLabel(scanner, &v->last_name_)) { | 113 if (ParseEmptyLabel(scanner, &v->last_name_)) { |
184 // There are three name fields; assume that the middle one is a | 114 // There are three name fields; assume that the middle one is a |
185 // middle initial (it is, at least, on SmithsonianCheckout.html). | 115 // middle initial (it is, at least, on SmithsonianCheckout.html). |
186 v->middle_name_ = next; | 116 v->middle_name_ = next; |
187 v->middle_initial_ = true; | 117 v->middle_initial_ = true; |
188 } else { // only two name fields | 118 } else { // only two name fields |
189 v->last_name_ = next; | 119 v->last_name_ = next; |
190 } | 120 } |
191 | 121 |
(...skipping 15 matching lines...) Expand all Loading... |
207 // dell_checkout1.html). At least one UK page (The China Shop2.html) | 137 // dell_checkout1.html). At least one UK page (The China Shop2.html) |
208 // asks, in stuffy English style, for just initials and a surname, | 138 // asks, in stuffy English style, for just initials and a surname, |
209 // so we match "initials" here (and just fill in a first name there, | 139 // so we match "initials" here (and just fill in a first name there, |
210 // American-style). | 140 // American-style). |
211 // The ".*first$" matches fields ending in "first" (example in sample8.html). | 141 // The ".*first$" matches fields ending in "first" (example in sample8.html). |
212 // The ".*last$" matches fields ending in "last" (example in sample8.html). | 142 // The ".*last$" matches fields ending in "last" (example in sample8.html). |
213 | 143 |
214 // Allow name fields to appear in any order. | 144 // Allow name fields to appear in any order. |
215 while (!scanner->IsEnd()) { | 145 while (!scanner->IsEnd()) { |
216 // Skip over any unrelated fields, e.g. "username" or "nickname". | 146 // Skip over any unrelated fields, e.g. "username" or "nickname". |
217 if (ParseFieldSpecifics(scanner, UTF8ToUTF16(kNameIgnoredRe), | 147 if (ParseFieldSpecifics(scanner, UTF8ToUTF16(autofill::kNameIgnoredRe), |
218 MATCH_DEFAULT | MATCH_SELECT, NULL)) { | 148 MATCH_DEFAULT | MATCH_SELECT, NULL)) { |
219 continue; | 149 continue; |
220 } | 150 } |
221 | 151 |
222 if (!v->first_name_ && | 152 if (!v->first_name_ && |
223 ParseField(scanner, UTF8ToUTF16(kFirstNameRe), &v->first_name_)) { | 153 ParseField(scanner, UTF8ToUTF16(autofill::kFirstNameRe), |
| 154 &v->first_name_)) { |
224 continue; | 155 continue; |
225 } | 156 } |
226 | 157 |
227 // We check for a middle initial before checking for a middle name | 158 // We check for a middle initial before checking for a middle name |
228 // because at least one page (PC Connection.html) has a field marked | 159 // because at least one page (PC Connection.html) has a field marked |
229 // as both (the label text is "MI" and the element name is | 160 // as both (the label text is "MI" and the element name is |
230 // "txtmiddlename"); such a field probably actually represents a | 161 // "txtmiddlename"); such a field probably actually represents a |
231 // middle initial. | 162 // middle initial. |
232 if (!v->middle_name_ && | 163 if (!v->middle_name_ && |
233 ParseField(scanner, UTF8ToUTF16(kMiddleInitialRe), &v->middle_name_)) { | 164 ParseField(scanner, UTF8ToUTF16(autofill::kMiddleInitialRe), |
| 165 &v->middle_name_)) { |
234 v->middle_initial_ = true; | 166 v->middle_initial_ = true; |
235 continue; | 167 continue; |
236 } | 168 } |
237 | 169 |
238 if (!v->middle_name_ && | 170 if (!v->middle_name_ && |
239 ParseField(scanner, UTF8ToUTF16(kMiddleNameRe), &v->middle_name_)) { | 171 ParseField(scanner, UTF8ToUTF16(autofill::kMiddleNameRe), |
| 172 &v->middle_name_)) { |
240 continue; | 173 continue; |
241 } | 174 } |
242 | 175 |
243 if (!v->last_name_ && | 176 if (!v->last_name_ && |
244 ParseField(scanner, UTF8ToUTF16(kLastNameRe), &v->last_name_)) { | 177 ParseField(scanner, UTF8ToUTF16(autofill::kLastNameRe), |
| 178 &v->last_name_)) { |
245 continue; | 179 continue; |
246 } | 180 } |
247 | 181 |
248 break; | 182 break; |
249 } | 183 } |
250 | 184 |
251 // Consider the match to be successful if we detected both first and last name | 185 // Consider the match to be successful if we detected both first and last name |
252 // fields. | 186 // fields. |
253 if (v->first_name_ && v->last_name_) | 187 if (v->first_name_ && v->last_name_) |
254 return v.release(); | 188 return v.release(); |
(...skipping 16 matching lines...) Expand all Loading... |
271 middle_initial_(false) { | 205 middle_initial_(false) { |
272 } | 206 } |
273 | 207 |
274 bool FirstLastNameField::ClassifyField(FieldTypeMap* map) const { | 208 bool FirstLastNameField::ClassifyField(FieldTypeMap* map) const { |
275 bool ok = AddClassification(first_name_, NAME_FIRST, map); | 209 bool ok = AddClassification(first_name_, NAME_FIRST, map); |
276 ok = ok && AddClassification(last_name_, NAME_LAST, map); | 210 ok = ok && AddClassification(last_name_, NAME_LAST, map); |
277 AutofillFieldType type = middle_initial_ ? NAME_MIDDLE_INITIAL : NAME_MIDDLE; | 211 AutofillFieldType type = middle_initial_ ? NAME_MIDDLE_INITIAL : NAME_MIDDLE; |
278 ok = ok && AddClassification(middle_name_, type, map); | 212 ok = ok && AddClassification(middle_name_, type, map); |
279 return ok; | 213 return ok; |
280 } | 214 } |
OLD | NEW |