Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(527)

Side by Side Diff: components/autofill/content/renderer/form_classifier.cc

Issue 1883183002: [Password Manager] HTML parsing based client-side form type classifier (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Changes addressed to reviewers' comments 2 Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/autofill/content/renderer/form_classifier.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/autofill/content/renderer/form_classifier.h"
6
7 #include <algorithm>
8
9 #include "base/macros.h"
10 #include "base/strings/string16.h"
11 #include "base/strings/string_util.h"
12 #include "components/autofill/content/renderer/form_autofill_util.h"
13 #include "third_party/WebKit/public/platform/WebString.h"
14 #include "third_party/WebKit/public/platform/WebVector.h"
15 #include "third_party/WebKit/public/web/WebFormControlElement.h"
16 #include "third_party/WebKit/public/web/WebInputElement.h"
17 #include "third_party/re2/src/re2/re2.h"
18
19 using autofill::form_util::WebFormControlElementToFormField;
20 using blink::WebFormControlElement;
21 using blink::WebInputElement;
22 using blink::WebString;
23 using blink::WebVector;
24
25 namespace autofill {
26
27 namespace {
28
29 // The words that frequently appear in attribute values of signin forms.
30 const char* const kSigninTextFeatures[] = {"signin", "login", "logon", "auth"};
31 constexpr size_t kNumberOfSigninFeatures = arraysize(kSigninTextFeatures);
32
33 // The words that frequently appear in attribute values of signup forms.
34 const char* const kSignupTextFeatures[] = {"signup", "regist", "creat"};
35 constexpr size_t kNumberOfSignupFeatures = arraysize(kSignupTextFeatures);
36
37 // The words that frequently appear in attribute values of captcha elements.
38 const char* const kCaptchaFeatures[] = {"captcha", "security", "code"};
39 constexpr size_t kNumberOfCaptchaFeatures = arraysize(kCaptchaFeatures);
40
41 // Minimal number of input fields to classify form as signup or change password
42 // form. If at least one of the listed thresholds is reached or exceeded, the
43 // form is classified as a form where password generation should be available.
44 constexpr size_t MINIMAL_NUMBER_OF_TEXT_FIELDS = 2;
45 constexpr size_t MINIMAL_NUMBER_OF_PASSWORD_FIELDS = 2;
46 constexpr size_t MINIMAL_NUMBER_OF_CHECKBOX_FIELDS = 3;
47 constexpr size_t MINIMAL_NUMBER_OF_OTHER_FIELDS = 2;
48
49 // Removes some characters from attribute value.
50 void ClearAttributeValue(std::string* value) {
51 value->erase(std::remove_if(value->begin(), value->end(),
52 [](char x) { return x == '-' || x == '_'; }),
53 value->end());
54 }
55
56 // Find |features| in |element|'s attribute values. Returns true if at least one
57 // text feature was found.
58 bool FindTextFeaturesForClass(const blink::WebElement& element,
59 const char* const features[],
60 size_t number_of_features) {
61 DCHECK(features);
62
63 for (unsigned i = 0; i < element.attributeCount(); ++i) {
64 std::string filtered_value =
65 base::ToLowerASCII(element.attributeValue(i).utf8());
66 ClearAttributeValue(&filtered_value);
67
68 if (filtered_value.empty())
69 continue;
70 for (size_t j = 0; j < number_of_features; ++j) {
71 if (filtered_value.find(features[j]) != std::string::npos)
72 return true;
73 }
74 }
75 return false;
76 }
77
78 // Returns true if at least one captcha feature was found in |element|'s
79 // attribute values.
80 bool IsCaptchaInput(const blink::WebInputElement& element) {
81 return FindTextFeaturesForClass(element, kCaptchaFeatures,
82 kNumberOfCaptchaFeatures);
83 }
84
85 // Finds <img>'s inside |form| and checks if <img>'s attributes contains captcha
86 // text features. Returns true, if at least one occurrence was found.
87 bool FindCaptchaInImgElements(const blink::WebElement& form,
88 bool ingnore_invisible) {
89 CR_DEFINE_STATIC_LOCAL(WebString, kImageTag, ("img"));
90
91 blink::WebElementCollection img_elements =
92 form.getElementsByHTMLTagName(kImageTag);
93 for (blink::WebElement element = img_elements.firstItem(); !element.isNull();
94 element = img_elements.nextItem()) {
95 if (ingnore_invisible && !form_util::IsWebNodeVisible(element))
96 continue;
97 if (FindTextFeaturesForClass(element, kCaptchaFeatures,
98 kNumberOfCaptchaFeatures)) {
99 return true;
100 }
101 }
102 return false;
103 }
104
105 // Finds signin and signup features in |element|'s attribute values. Sets to
106 // true |found_signin_text_features| or |found_signup_text_features| if
107 // appropriate features were found.
108 void FindTextFeaturesInElement(const blink::WebElement& element,
109 bool* found_signin_text_features,
110 bool* found_signup_text_features) {
111 DCHECK(found_signin_text_features);
112 DCHECK(found_signup_text_features);
113
114 if (!*found_signin_text_features) {
115 *found_signin_text_features = FindTextFeaturesForClass(
116 element, kSigninTextFeatures, kNumberOfSigninFeatures);
117 }
118 if (!*found_signup_text_features) {
119 *found_signup_text_features = FindTextFeaturesForClass(
120 element, kSignupTextFeatures, kNumberOfSignupFeatures);
121 }
122 }
123
124 // Returns true if |element| has type "button" or "image".
125 bool IsButtonOrImageElement(const WebFormControlElement& element) {
126 CR_DEFINE_STATIC_LOCAL(WebString, kButton, ("button"));
127 CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("image"));
128
129 return element.formControlType() == kButton ||
130 element.formControlType() == kImage;
131 }
132
133 // Returns true if |element| has type "submit".
134 bool IsSubmitElement(const WebFormControlElement& element) {
135 CR_DEFINE_STATIC_LOCAL(WebString, kSubmit, ("submit"));
136
137 return element.formControlType() == kSubmit;
138 }
139
140 // Returns true if |element| has type "hidden";
141 bool IsHiddenElement(const WebFormControlElement& element) {
142 CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden"));
143
144 return element.formControlType() == kHidden;
145 }
146
147 // Returns true if |element| has type "select-multiple" or "select-one".
148 bool IsSelectElement(const WebFormControlElement& element) {
149 CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one"));
150 CR_DEFINE_STATIC_LOCAL(WebString, kSelectMultiple, ("select-multiple"));
151
152 return element.formControlType() == kSelectOne ||
153 element.formControlType() == kSelectMultiple;
154 }
155
156 // Return true if |form| contains at least one visible password element.
157 bool FormContainsVisiblePasswordFields(const blink::WebFormElement& form) {
158 WebVector<WebFormControlElement> control_elements;
159 form.getFormControlElements(control_elements);
160 for (auto& control_element : control_elements) {
161 const WebInputElement* input_element = toWebInputElement(&control_element);
162 if (!input_element)
163 continue;
164 if (input_element->isPasswordField() &&
165 form_util::IsWebNodeVisible(*input_element)) {
166 return true;
167 }
168 }
169 return false;
170 }
171
172 // Finds text features in |form| (only <form>'s attribute values, but not its
173 // descendants) and its ancestors (only attribute values of the parental nodes,
174 // but not the whole tree of the ancestors).
175 // Sets |found_signin_text_features| and |found_signup_text_features| to true,
176 // if corresponding features are found.
177 void FindTextFeaturesInFormAndItsAncestors(const blink::WebFormElement& form,
178 bool* found_signin_text_features,
179 bool* found_signup_text_features) {
180 CR_DEFINE_STATIC_LOCAL(WebString, kInput, ("input"));
181
182 DCHECK(found_signin_text_features);
183 DCHECK(found_signup_text_features);
184
185 unsigned number_of_inputs = form.getElementsByHTMLTagName(kInput).length();
186 blink::WebNode parent = form;
187 for (; !parent.isNull();) {
188 if (parent.isElementNode()) {
189 blink::WebElement element = parent.toConst<blink::WebElement>();
190 bool has_signin_feature = FindTextFeaturesForClass(
191 element, kSigninTextFeatures, kNumberOfSigninFeatures);
192 bool has_signup_feature = FindTextFeaturesForClass(
193 element, kSignupTextFeatures, kNumberOfSignupFeatures);
194 if (has_signin_feature && has_signup_feature)
195 break;
196 // If this ancestor contains more input elements, the ancestor might
197 // include another form. So, ignore the feature.
198 if (parent.getElementsByHTMLTagName(kInput).length() > number_of_inputs)
199 break;
200 if (has_signin_feature) {
201 *found_signin_text_features = true;
202 break;
203 }
204 if (has_signup_feature) {
205 *found_signup_text_features = true;
206 break;
207 }
208 }
209 parent = parent.parentNode();
210 }
211 }
212
213 } // namespace
214
215 bool ClassifyFormAndFindGenerationField(const blink::WebFormElement& form,
216 base::string16* generation_field) {
217 DCHECK(generation_field);
218
219 if (form.isNull())
220 return false;
221
222 bool ignore_invisible_elements = FormContainsVisiblePasswordFields(form);
223
224 bool found_signin_text_features = false;
225 bool found_signup_text_features = false;
226 size_t number_of_text_input_fields = 0;
227 size_t number_of_password_input_fields = 0;
228 size_t number_of_checkbox_input_fields = 0;
229 size_t number_of_other_input_fields = 0;
230 bool found_captcha =
231 FindCaptchaInImgElements(form, ignore_invisible_elements);
232
233 FindTextFeaturesInFormAndItsAncestors(form, &found_signin_text_features,
234 &found_signup_text_features);
235
236 std::vector<WebInputElement> passwords;
237 WebVector<WebFormControlElement> control_elements;
238 form.getFormControlElements(control_elements);
239
240 for (const WebFormControlElement& control_element : control_elements) {
241 if (IsHiddenElement(control_element))
242 continue;
243 if (ignore_invisible_elements) {
244 bool element_is_invisible = !form_util::IsWebNodeVisible(control_element);
vabr (Chromium) 2016/06/13 16:01:57 nit: Shorter form is: if (!form_util::IsWebNodeVis
kolos1 2016/06/15 14:15:15 Done.
245 if (element_is_invisible)
246 continue;
247 }
248
249 // If type="button" or "image", skip them, because it might be a link
250 // to another form.
251 if (IsButtonOrImageElement(control_element))
252 continue;
253
254 FindTextFeaturesInElement(control_element, &found_signin_text_features,
255 &found_signup_text_features);
256
257 // Since <select> is not WebInputElement, but WebSelectElement, process
258 // them as a special case.
259 if (IsSelectElement(control_element)) {
260 number_of_other_input_fields++;
261 } else {
262 const WebInputElement* input_element =
263 toWebInputElement(&control_element);
264 if (!input_element)
265 continue;
266
267 if (input_element->isTextField()) {
268 if (input_element->isPasswordField()) {
269 ++number_of_password_input_fields;
270 passwords.push_back(*input_element);
271 } else {
272 ++number_of_text_input_fields;
273 found_captcha = found_captcha || IsCaptchaInput(*input_element);
274 }
275 } else { // Non-text fields.
276 if (input_element->isCheckbox())
277 ++number_of_checkbox_input_fields;
278 else if (!IsSubmitElement(*input_element))
279 ++number_of_other_input_fields;
280 }
281 }
282 }
283
284 if (number_of_password_input_fields == 0 ||
285 number_of_password_input_fields > 3)
286 return false;
287
288 if ((number_of_text_input_fields - found_captcha >=
289 MINIMAL_NUMBER_OF_TEXT_FIELDS ||
290 number_of_password_input_fields >= MINIMAL_NUMBER_OF_PASSWORD_FIELDS ||
291 number_of_checkbox_input_fields >= MINIMAL_NUMBER_OF_CHECKBOX_FIELDS ||
292 number_of_other_input_fields >= MINIMAL_NUMBER_OF_OTHER_FIELDS) ||
293 (found_signup_text_features && !found_signin_text_features)) {
294 WebInputElement password_creation_field;
295
296 // TODO(crbug.com/618309): Improve local classifier to distinguish password
297 // creation and password usage fields on the change password forms.
298 if (passwords.size() == 3)
299 password_creation_field = passwords[1];
300 else
301 password_creation_field = passwords[0];
302
303 *generation_field = password_creation_field.nameForAutofill();
304 return true;
305 }
306 return false;
307 }
308 }
OLDNEW
« no previous file with comments | « components/autofill/content/renderer/form_classifier.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698