Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(245)

Side by Side Diff: components/autofill/content/renderer/form_classifier.cc

Issue 1883183002: [Password Manager] HTML parsing based client-side form type classifier (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Changes addressed to reviewer comments Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/autofill/content/renderer/form_classifier.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/autofill/content/renderer/form_classifier.h"
6
7 #include <algorithm>
8
9 #include "base/strings/string_util.h"
10 #include "components/autofill/content/renderer/form_autofill_util.h"
11 #include "third_party/WebKit/public/platform/WebString.h"
12 #include "third_party/WebKit/public/platform/WebVector.h"
13 #include "third_party/WebKit/public/web/WebFormControlElement.h"
14 #include "third_party/WebKit/public/web/WebInputElement.h"
15 #include "third_party/re2/src/re2/re2.h"
16
17 using autofill::form_util::WebFormControlElementToFormField;
18 using blink::WebFormControlElement;
19 using blink::WebInputElement;
20 using blink::WebString;
21 using blink::WebVector;
22
23 namespace autofill {
24
25 namespace {
26
27 // The words that frequently appear in attribute values of signin forms.
28 const char* const kSigninTextFeatures[] = {"signin", "login", "logon", "auth"};
29 const int kNumberOfSigninFeatures = arraysize(kSigninTextFeatures);
30
31 // The words that frequently appear in attribute values of signup forms.
32 const char* const kSignupTextFeatures[] = {"signup", "regist", "creat"};
33 const int kNumberOfSignupFeatures = arraysize(kSignupTextFeatures);
34
35 // The words that frequently appear in attribute values of captcha elements.
36 const char* const kCaptchaFeatures[] = {"captcha", "security", "code"};
37 const int kNumberOfCaptchaFeatures = arraysize(kCaptchaFeatures);
38
39 // The characters that should be removed from attribute values.
40 const char kCharactersToBeRemoved[] = "-|_";
41
42 // Minimal number of input fields to detect signup/change password form.
43 const size_t MINIMAL_NUMBER_OF_TEXT_FIELDS = 2;
44 const size_t MINIMAL_NUMBER_OF_PASSWORD_FIELDS = 2;
45 const size_t MINIMAL_NUMBER_OF_CHECKBOX_FIELDS = 3;
46 const size_t MINIMAL_NUMBER_OF_OTHER_FIELDS = 2;
47
48 // Find |features| in |element|'s attribute values. Returns true if at least one
49 // text feature was found.
50 bool FindTextFeaturesForClass(const blink::WebElement& element,
51 const char* const features[],
52 size_t number_of_features) {
53 DCHECK(features);
54
55 for (unsigned i = 0; i < element.attributeCount(); ++i) {
56 std::string filtered_value =
57 base::ToLowerASCII(element.attributeValue(i).utf8());
58 RE2::GlobalReplace(&filtered_value, kCharactersToBeRemoved, "");
vabr (Chromium) 2016/06/10 13:22:12 I'm afraid both regexps and the substring replacem
dvadym 2016/06/10 14:12:09 I wouldn't mind returning to erase-remove, but I'd
vabr (Chromium) 2016/06/10 14:32:14 Parsing is indeed fast, but building/compiling the
dvadym 2016/06/10 15:05:16 Sure regexp parsing is slow, that exactly what I m
kolos1 2016/06/13 14:27:34 Replaced with erase/remove_if solution.
59
60 if (filtered_value.empty())
61 continue;
62 for (size_t j = 0; j < number_of_features; j++) {
63 if (filtered_value.find(features[j]) != std::string::npos)
64 return true;
65 }
66 }
67 return false;
68 }
69
70 // Returns true if at least one captcha feature was found in |element|'s
71 // attribute values.
72 bool IsCaptchaInput(const blink::WebInputElement& element) {
73 return FindTextFeaturesForClass(element, kCaptchaFeatures,
74 kNumberOfCaptchaFeatures);
75 }
76
77 // Finds <img>'s inside |form| and checks if <img>'s attributes contains captcha
78 // text features. Returns true, if at least one occurrence was found.
79 bool FindCaptchaInImgElements(const blink::WebElement& form,
80 bool ingnore_invisible) {
81 CR_DEFINE_STATIC_LOCAL(WebString, kImageTag, ("img"));
82
83 blink::WebElementCollection img_elements =
84 form.getElementsByHTMLTagName(kImageTag);
85 for (blink::WebElement element = img_elements.firstItem(); !element.isNull();
86 element = img_elements.nextItem()) {
87 if (ingnore_invisible && !form_util::IsWebNodeVisible(element))
88 continue;
89 if (FindTextFeaturesForClass(element, kCaptchaFeatures,
90 kNumberOfCaptchaFeatures))
91 return true;
92 }
93 return false;
94 }
95
96 // Finds signin and signup features in |element|'s attribute values. Sets to
97 // true |found_signin_text_features| or |found_signup_text_features| if
98 // appropriate features were found.
99 void FindTextFeaturesInElement(const blink::WebElement& element,
100 bool* found_signin_text_features,
101 bool* found_signup_text_features) {
102 DCHECK(found_signin_text_features);
103 DCHECK(found_signup_text_features);
104
105 if (!found_signin_text_features) {
106 *found_signin_text_features = FindTextFeaturesForClass(
107 element, kSigninTextFeatures, kNumberOfSigninFeatures);
108 }
109 if (!found_signup_text_features) {
110 *found_signup_text_features = FindTextFeaturesForClass(
111 element, kSignupTextFeatures, kNumberOfSignupFeatures);
112 }
113 }
114
115 // Returns true if |element| has type "button" or "image".
116 bool IsButtonOrImageElement(const WebFormControlElement& element) {
117 CR_DEFINE_STATIC_LOCAL(WebString, kButton, ("button"));
118 CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("image"));
119
120 return element.formControlType() == kButton ||
121 element.formControlType() == kImage;
122 }
123
124 // Returns true if |element| has type "submit".
125 bool IsSubmitElement(const WebFormControlElement& element) {
126 CR_DEFINE_STATIC_LOCAL(WebString, kSubmit, ("submit"));
127
128 return element.formControlType() == kSubmit;
129 }
130
131 // Returns true if |element| has type "hidden";
132 bool IsHiddenElement(const WebFormControlElement& element) {
133 CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden"));
134
135 return element.formControlType() == kHidden;
136 }
137
138 // Returns true if |element| has type "select-multiple" or "select-one".
139 bool IsSelectElement(const WebFormControlElement& element) {
140 CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one"));
141 CR_DEFINE_STATIC_LOCAL(WebString, kSelectMultiple, ("select-multiple"));
142
143 return element.formControlType() == kSelectOne ||
144 element.formControlType() == kSelectMultiple;
145 }
146
147 // Return true if |form| contains at least one visible password element.
148 bool FormContainsVisiblePasswordFields(const blink::WebFormElement& form) {
149 WebVector<WebFormControlElement> control_elements;
150 form.getFormControlElements(control_elements);
151 for (auto& control_element : control_elements) {
152 const WebInputElement* input_element = toWebInputElement(&control_element);
153 if (!input_element)
154 continue;
155 if (input_element->isPasswordField() &&
156 form_util::IsWebNodeVisible(*input_element))
157 return true;
158 }
159 return false;
160 }
161
162 // Finds text features in <form> tag of |form| and its ancestors.
163 // Sets |found_signin_text_features| and |found_signup_text_features| to true,
164 // if corresponding features are found.
165 void FindTextFeaturesInFormAndItsAncestors(const blink::WebFormElement& form,
166 bool* found_signin_text_features,
167 bool* found_signup_text_features) {
168 CR_DEFINE_STATIC_LOCAL(WebString, kInput, ("input"));
169
170 DCHECK(found_signin_text_features);
171 DCHECK(found_signup_text_features);
172
173 unsigned number_of_inputs = form.getElementsByHTMLTagName(kInput).length();
174 blink::WebNode parent = form;
175 for (; !parent.isNull();) {
176 if (parent.getElementsByHTMLTagName(kInput).length() > number_of_inputs)
177 break;
178 if (parent.isElementNode()) {
179 blink::WebElement element = parent.toConst<blink::WebElement>();
180 bool has_signin_feature = FindTextFeaturesForClass(
181 element, kSigninTextFeatures, kNumberOfSigninFeatures);
182 bool has_signup_feature = FindTextFeaturesForClass(
183 element, kSignupTextFeatures, kNumberOfSignupFeatures);
184 if (has_signin_feature && has_signup_feature)
185 break;
186 if (has_signin_feature) {
187 *found_signin_text_features = true;
188 break;
189 }
190 if (has_signup_feature) {
191 *found_signup_text_features = true;
192 break;
193 }
194 }
195 parent = parent.parentNode();
196 }
197 }
198
199 } // namespace
200
201 bool ClassifyFormAndFindGenerationField(const blink::WebFormElement& form,
202 base::string16* generation_field) {
203 DCHECK(generation_field);
204
205 if (form.isNull())
206 return false;
207
208 bool ignore_invisible_elements = FormContainsVisiblePasswordFields(form);
209
210 bool found_signin_text_features = false;
211 bool found_signup_text_features = false;
212 size_t number_of_text_input_fields = 0;
213 size_t number_of_password_input_fields = 0;
214 size_t number_of_checkbox_input_fields = 0;
215 size_t number_of_other_input_fields = 0;
216 bool found_captcha =
217 FindCaptchaInImgElements(form, ignore_invisible_elements);
218
219 FindTextFeaturesInFormAndItsAncestors(form, &found_signin_text_features,
220 &found_signup_text_features);
221
222 std::vector<WebInputElement> passwords;
223 WebVector<WebFormControlElement> control_elements;
224 form.getFormControlElements(control_elements);
225
226 for (size_t i = 0; i < control_elements.size(); ++i) {
227 WebFormControlElement control_element = control_elements[i];
228 bool element_is_invisible = !form_util::IsWebNodeVisible(control_element);
229 if ((element_is_invisible && ignore_invisible_elements) ||
230 IsHiddenElement(control_element))
231 continue;
232
233 // If type="button" or "image", skip them, because it might be a link
234 // to another form.
235 if (IsButtonOrImageElement(control_element))
236 continue;
237
238 FindTextFeaturesInElement(control_element, &found_signin_text_features,
239 &found_signup_text_features);
240
241 // Since <select> is not WebInputElement, but WebSelectElement, process
242 // them as a special case.
243 if (IsSelectElement(control_element)) {
244 number_of_other_input_fields++;
245 } else {
246 WebInputElement* input_element = toWebInputElement(&control_element);
247 if (!input_element)
248 continue;
249
250 if (input_element->isTextField()) {
251 if (input_element->isPasswordField()) {
252 number_of_password_input_fields++;
253 passwords.push_back(*input_element);
254 } else {
255 number_of_text_input_fields++;
256 found_captcha = found_captcha || IsCaptchaInput(*input_element);
257 }
258 } else { // Non-text fields.
259 if (input_element->isCheckbox())
260 number_of_checkbox_input_fields++;
261 else if (!IsSubmitElement(*input_element))
262 number_of_other_input_fields++;
263 }
264 }
265 }
266
267 if (number_of_password_input_fields == 0 ||
268 number_of_password_input_fields > 3)
269 return false;
270
271 if ((number_of_text_input_fields - found_captcha >=
272 MINIMAL_NUMBER_OF_TEXT_FIELDS ||
273 number_of_password_input_fields >= MINIMAL_NUMBER_OF_PASSWORD_FIELDS ||
274 number_of_checkbox_input_fields >= MINIMAL_NUMBER_OF_CHECKBOX_FIELDS ||
275 number_of_other_input_fields >= MINIMAL_NUMBER_OF_OTHER_FIELDS) ||
276 (found_signup_text_features && !found_signin_text_features)) {
277 WebInputElement password_creation_field;
278
279 // TODO(crbug.com/618309): Improve local classifier to distinguish password
280 // creation and password usage fields on the change password forms.
281 if (passwords.size() == 3)
282 password_creation_field = passwords[1];
283 else
284 password_creation_field = passwords[0];
285
286 *generation_field = password_creation_field.nameForAutofill();
287 return true;
288 }
289 return false;
290 }
291 }
OLDNEW
« no previous file with comments | « components/autofill/content/renderer/form_classifier.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698