Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(498)

Side by Side Diff: components/autofill/content/renderer/form_classifier.cc

Issue 1883183002: [Password Manager] HTML parsing based client-side form type classifier (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Changed the signature of ClassifyFormAndFindGenerationField Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/autofill/content/renderer/form_classifier.h"
6
7 #include <algorithm>
8
9 #include "base/strings/string_util.h"
vabr (Chromium) 2016/06/10 13:22:12 Please also #include "base/string16.h".
kolos1 2016/06/13 14:27:34 Done.
10 #include "components/autofill/content/renderer/form_autofill_util.h"
11 #include "third_party/WebKit/public/platform/WebString.h"
12 #include "third_party/WebKit/public/platform/WebVector.h"
13 #include "third_party/WebKit/public/web/WebFormControlElement.h"
14 #include "third_party/WebKit/public/web/WebInputElement.h"
15
16 using autofill::form_util::WebFormControlElementToFormField;
17 using blink::WebFormControlElement;
18 using blink::WebInputElement;
19 using blink::WebString;
20 using blink::WebVector;
21
22 namespace autofill {
23
24 namespace {
25
26 // The words that frequently appear in attribute values of signin forms.
27 const char* const kSigninTextFeatures[] = {"signin", "login", "logon", "auth"};
28 const int kNumberOfSigninFeatures = arraysize(kSigninTextFeatures);
vabr (Chromium) 2016/06/10 13:22:11 Please #include "base/macros.h" for arraysize.
vabr (Chromium) 2016/06/10 13:22:12 Please use constexpr here and with the constants b
vabr (Chromium) 2016/06/10 13:22:12 Please use size_t instead of int. Unlike int, size
kolos1 2016/06/13 14:27:34 Done.
kolos1 2016/06/13 14:27:34 Done.
kolos1 2016/06/13 14:27:34 Done.
29
30 // The words that frequently appear in attribute values of signup forms.
31 const char* const kSignupTextFeatures[] = {"signup", "regist", "creat"};
32 const int kNumberOfSignupFeatures = arraysize(kSignupTextFeatures);
33
34 // The words that frequently appear in attribute values of captcha elements.
35 const char* const kCaptchaFeatures[] = {"captcha", "security", "code"};
36 const int kNumberOfCaptchaFeatures = arraysize(kCaptchaFeatures);
37
38 // The characters that should be removed from attribute values.
39 const char kCharactersToBeRemoved[] = {'-', '_'};
40
41 // Minimal number of input fields to detect signup/change password form.
vabr (Chromium) 2016/06/10 13:22:11 The comment does not make it clear how the minimal
kolos1 2016/06/13 14:27:34 Fixed the comment.
42 const size_t MINIMAL_NUMBER_OF_TEXT_FIELDS = 2;
43 const size_t MINIMAL_NUMBER_OF_PASSWORD_FIELDS = 2;
44 const size_t MINIMAL_NUMBER_OF_CHECKBOX_FIELDS = 3;
45 const size_t MINIMAL_NUMBER_OF_OTHER_FIELDS = 2;
46
47 // Helper function that removes all occurrences of the given character |c| from
48 // the string |str|.
49 void RemoveAllOccurrencesOfCharacter(std::string* str, char c) {
50 str->erase(std::remove(str->begin(), str->end(), c), str->end());
51 }
52
53 // Find |features| in |element|'s attribute values. Returns true if at least one
54 // text feature was found.
55 bool FindTextFeaturesForClass(const blink::WebElement& element,
56 const char* const features[],
57 size_t number_of_features) {
58 for (unsigned i = 0; i < element.attributeCount(); ++i) {
59 std::string filtered_value =
60 base::ToLowerASCII(element.attributeValue(i).utf8());
61 for (char d : kCharactersToBeRemoved)
dvadym 2016/06/09 12:40:59 Can we use regexp for removing symbols?
kolos1 2016/06/10 12:18:08 Done.
62 RemoveAllOccurrencesOfCharacter(&filtered_value, d);
63 if (filtered_value.empty())
64 continue;
65 for (size_t j = 0; j < number_of_features; j++) {
vabr (Chromium) 2016/06/10 13:22:12 nit: ++j (Let's be consistent and use prefix incre
kolos1 2016/06/13 14:27:34 Done.
66 if (filtered_value.find(features[j]) != std::string::npos)
67 return true;
68 }
69 }
70 return false;
71 }
72
73 // Returns true if at least one captcha feature was found in |element|'s
74 // attribute values.
75 bool IsCaptchaInput(const blink::WebInputElement& element) {
76 return FindTextFeaturesForClass(element, kCaptchaFeatures,
77 kNumberOfCaptchaFeatures);
78 }
79
80 // Finds <img>'s inside |form| and checks if <img>'s attributes contains captcha
81 // text features. Returns true, if at least one occurrence was found.
82 bool FindCaptchaInImgElements(const blink::WebElement& form,
83 bool ingnore_invisible) {
84 CR_DEFINE_STATIC_LOCAL(WebString, kImageTag, ("img"));
85
86 blink::WebElementCollection img_elements =
87 form.getElementsByHTMLTagName(kImageTag);
88 for (blink::WebElement element = img_elements.firstItem(); !element.isNull();
89 element = img_elements.nextItem()) {
90 if (ingnore_invisible && !form_util::IsWebNodeVisible(element))
91 continue;
92 if (FindTextFeaturesForClass(element, kCaptchaFeatures,
93 kNumberOfCaptchaFeatures))
94 return true;
95 }
96 return false;
97 }
98
99 // Finds signin and signup features in |element|'s attribute values. Sets to
100 // true |found_signin_text_features| or |found_signup_text_features| if
101 // appropriate features were found.
102 void FindTextFeaturesInElement(bool* found_signin_text_features,
dvadym 2016/06/09 12:40:59 Input arguments should be before output arguments
kolos1 2016/06/10 12:18:08 Done.
103 bool* found_signup_text_features,
dvadym 2016/06/09 12:40:59 Could you please add DCHECK for found_*_text_featu
kolos1 2016/06/10 12:18:08 Done.
104 const blink::WebElement& element) {
105 if (!found_signin_text_features) {
106 *found_signin_text_features = FindTextFeaturesForClass(
107 element, kSigninTextFeatures, kNumberOfSigninFeatures);
108 }
109 if (!found_signup_text_features) {
110 *found_signup_text_features = FindTextFeaturesForClass(
111 element, kSignupTextFeatures, kNumberOfSignupFeatures);
112 }
113 }
114
115 // Returns true if |element| has type "button" or "image".
116 bool IsButtonOrImageElement(const WebFormControlElement& element) {
117 CR_DEFINE_STATIC_LOCAL(WebString, kButton, ("button"));
dvadym 2016/06/09 12:40:59 All input type names const are in InputTypeNames.h
kolos1 2016/06/10 12:18:08 I'm not sure we could use this file. As I understa
dvadym 2016/06/10 14:12:09 Acknowledged
118 CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("image"));
119
120 return element.formControlType() == kButton ||
121 element.formControlType() == kImage;
122 }
123
124 // Returns true if |element| has type "submit".
125 bool IsSubmitElement(const WebFormControlElement& element) {
126 CR_DEFINE_STATIC_LOCAL(WebString, kSubmit, ("submit"));
127
128 return element.formControlType() == kSubmit;
129 }
130
131 // Returns true if |element| has type "hidden";
132 bool IsHiddenElement(const WebFormControlElement& element) {
133 CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden"));
134
135 return element.formControlType() == kHidden;
136 }
137
138 // Returns true if |element| has type "select-multiple" or "select-one".
139 bool IsSelectElement(const WebFormControlElement& element) {
140 CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one"));
141 CR_DEFINE_STATIC_LOCAL(WebString, kSelectMultiple, ("select-multiple"));
142
143 return element.formControlType() == kSelectOne ||
144 element.formControlType() == kSelectMultiple;
145 }
146
147 // Return true if |form| contains at least one visible password element.
148 bool FormContainsVisiblePasswordFields(const blink::WebFormElement& form) {
149 WebVector<WebFormControlElement> control_elements;
150 form.getFormControlElements(control_elements);
151 for (auto& control_element : control_elements) {
152 const WebInputElement* input_element = toWebInputElement(&control_element);
153 if (!input_element)
154 continue;
155 if (input_element->isPasswordField() &&
156 form_util::IsWebNodeVisible(*input_element))
157 return true;
158 }
159 return false;
160 }
161
162 // Finds text features in <form> tag of |form| and its ancestors.
vabr (Chromium) 2016/06/10 13:22:12 nit: The comment sounds not completely correct. Wh
kolos1 2016/06/13 14:27:34 Fixed the comment.
163 // Sets |found_signin_text_features| and |found_signup_text_features| to true,
164 // if corresponding features are found.
165 void FindTextFeaturesInFormAndItsAncestors(const blink::WebFormElement& form,
dvadym 2016/06/09 12:40:59 The same comment as to FindTextFeaturesInElement a
kolos1 2016/06/10 12:18:08 Done.
166 bool* found_signin_text_features,
167 bool* found_signup_text_features) {
168 CR_DEFINE_STATIC_LOCAL(WebString, kInput, ("input"));
169
170 unsigned number_of_inputs = form.getElementsByHTMLTagName(kInput).length();
171 blink::WebNode parent = form;
172 for (; !parent.isNull();) {
dvadym 2016/06/09 12:40:59 I'm concerning a little bit in performance of this
kolos1 2016/06/10 12:18:08 I will ask to review Webkit specialist. Thanks.
173 if (parent.getElementsByHTMLTagName(kInput).length() > number_of_inputs)
vabr (Chromium) 2016/06/10 13:22:11 +1 to Vadym's worry about performance. This line i
dvadym 2016/06/10 14:12:09 Yeah, perfomance impact is not clear. Independentl
kolos1 2016/06/13 14:27:34 I reduced the number of calls "getElementsByHTMLTa
174 break;
175 if (parent.isElementNode()) {
176 blink::WebElement element = parent.toConst<blink::WebElement>();
177 bool has_signin_feature = FindTextFeaturesForClass(
178 element, kSigninTextFeatures, kNumberOfSigninFeatures);
179 bool has_signup_feature = FindTextFeaturesForClass(
180 element, kSignupTextFeatures, kNumberOfSignupFeatures);
181 if (has_signin_feature && has_signup_feature)
182 break;
183 if (has_signin_feature) {
184 *found_signin_text_features = true;
185 break;
186 }
187 if (has_signup_feature) {
188 *found_signup_text_features = true;
189 break;
190 }
191 }
192 parent = parent.parentNode();
193 }
194 }
195
196 } // namespace
197
198 bool ClassifyFormAndFindGenerationField(const blink::WebFormElement& form,
199 base::string16* generation_field) {
200 DCHECK(generation_field);
201
202 if (form.isNull())
203 return false;
204
205 bool ignore_invisible_elements = FormContainsVisiblePasswordFields(form);
206
207 bool found_signin_text_features = false;
208 bool found_signup_text_features = false;
209 size_t number_of_text_input_fields = 0;
210 size_t number_of_password_input_fields = 0;
211 size_t number_of_checkbox_input_fields = 0;
212 size_t number_of_other_input_fields = 0;
213 bool found_captcha =
214 FindCaptchaInImgElements(form, ignore_invisible_elements);
215
216 FindTextFeaturesInFormAndItsAncestors(form, &found_signin_text_features,
217 &found_signup_text_features);
218
219 std::vector<WebInputElement> passwords;
220 WebVector<WebFormControlElement> control_elements;
221 form.getFormControlElements(control_elements);
222
223 for (size_t i = 0; i < control_elements.size(); ++i) {
vabr (Chromium) 2016/06/10 13:22:11 nit: Compress the first two line into: for (const
kolos1 2016/06/13 14:27:34 Done.
224 WebFormControlElement control_element = control_elements[i];
225 bool element_is_invisible = !form_util::IsWebNodeVisible(control_element);
vabr (Chromium) 2016/06/10 13:22:12 IsWebNodeVisible might be expensive, please only c
kolos1 2016/06/13 14:27:34 Done.
226 if ((element_is_invisible && ignore_invisible_elements) ||
227 IsHiddenElement(control_element))
vabr (Chromium) 2016/06/10 13:22:11 nit: "if" statements with more than 2 lines in tot
kolos1 2016/06/13 14:27:34 Done.
228 continue;
229
230 // If type="button" or "image", skip them, because it might be a link
231 // to another form.
232 if (IsButtonOrImageElement(control_element))
233 continue;
234
235 FindTextFeaturesInElement(&found_signin_text_features,
236 &found_signup_text_features, control_element);
237
238 // Since <select> is not WebInputElement, but WebSelectElement, process
239 // them as a special case.
240 if (IsSelectElement(control_element)) {
241 number_of_other_input_fields++;
vabr (Chromium) 2016/06/10 13:22:12 nit: Here and below: please use prefix ++ unless y
kolos1 2016/06/13 14:27:34 Done.
242 } else {
243 WebInputElement* input_element = toWebInputElement(&control_element);
244 if (!input_element)
245 continue;
246
247 if (input_element->isTextField()) {
248 if (input_element->isPasswordField()) {
249 number_of_password_input_fields++;
250 passwords.push_back(*input_element);
251 } else {
252 number_of_text_input_fields++;
253 found_captcha = found_captcha || IsCaptchaInput(*input_element);
254 }
255 } else { // Non-text fields.
256 if (input_element->isCheckbox())
257 number_of_checkbox_input_fields++;
258 else if (!IsSubmitElement(*input_element))
259 number_of_other_input_fields++;
260 }
261 }
262 }
263
264 if (number_of_password_input_fields == 0)
265 return false;
266
267 if ((number_of_text_input_fields - found_captcha >=
268 MINIMAL_NUMBER_OF_TEXT_FIELDS ||
269 number_of_password_input_fields >= MINIMAL_NUMBER_OF_PASSWORD_FIELDS ||
270 number_of_checkbox_input_fields >= MINIMAL_NUMBER_OF_CHECKBOX_FIELDS ||
271 number_of_other_input_fields >= MINIMAL_NUMBER_OF_OTHER_FIELDS) ||
272 (found_signup_text_features && !found_signin_text_features)) {
273 WebInputElement password_creation_field;
274
275 // TODO(crbug.com/618309): Improve local classifier to distinguish password
276 // creation and password usage fields on the change password forms.
277 if (passwords.size() == 3)
278 password_creation_field = passwords[1];
279 else
280 password_creation_field = passwords[0];
dvadym 2016/06/09 12:40:59 What's about case when passwords.size() > 3? It co
kolos1 2016/06/10 12:18:08 I saw one site where there were 4 password fields
281
282 *generation_field = password_creation_field.nameForAutofill();
283 return true;
284 }
285 return false;
286 }
287 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698