OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "components/autofill/content/renderer/form_classifier.h" | |
6 | |
7 #include <algorithm> | |
8 | |
9 #include "base/strings/string_util.h" | |
vabr (Chromium)
2016/06/10 13:22:12
Please also #include "base/string16.h".
kolos1
2016/06/13 14:27:34
Done.
| |
10 #include "components/autofill/content/renderer/form_autofill_util.h" | |
11 #include "third_party/WebKit/public/platform/WebString.h" | |
12 #include "third_party/WebKit/public/platform/WebVector.h" | |
13 #include "third_party/WebKit/public/web/WebFormControlElement.h" | |
14 #include "third_party/WebKit/public/web/WebInputElement.h" | |
15 | |
16 using autofill::form_util::WebFormControlElementToFormField; | |
17 using blink::WebFormControlElement; | |
18 using blink::WebInputElement; | |
19 using blink::WebString; | |
20 using blink::WebVector; | |
21 | |
22 namespace autofill { | |
23 | |
24 namespace { | |
25 | |
26 // The words that frequently appear in attribute values of signin forms. | |
27 const char* const kSigninTextFeatures[] = {"signin", "login", "logon", "auth"}; | |
28 const int kNumberOfSigninFeatures = arraysize(kSigninTextFeatures); | |
vabr (Chromium)
2016/06/10 13:22:11
Please #include "base/macros.h" for arraysize.
vabr (Chromium)
2016/06/10 13:22:12
Please use constexpr here and with the constants b
vabr (Chromium)
2016/06/10 13:22:12
Please use size_t instead of int. Unlike int, size
kolos1
2016/06/13 14:27:34
Done.
kolos1
2016/06/13 14:27:34
Done.
kolos1
2016/06/13 14:27:34
Done.
| |
29 | |
30 // The words that frequently appear in attribute values of signup forms. | |
31 const char* const kSignupTextFeatures[] = {"signup", "regist", "creat"}; | |
32 const int kNumberOfSignupFeatures = arraysize(kSignupTextFeatures); | |
33 | |
34 // The words that frequently appear in attribute values of captcha elements. | |
35 const char* const kCaptchaFeatures[] = {"captcha", "security", "code"}; | |
36 const int kNumberOfCaptchaFeatures = arraysize(kCaptchaFeatures); | |
37 | |
38 // The characters that should be removed from attribute values. | |
39 const char kCharactersToBeRemoved[] = {'-', '_'}; | |
40 | |
41 // Minimal number of input fields to detect signup/change password form. | |
vabr (Chromium)
2016/06/10 13:22:11
The comment does not make it clear how the minimal
kolos1
2016/06/13 14:27:34
Fixed the comment.
| |
42 const size_t MINIMAL_NUMBER_OF_TEXT_FIELDS = 2; | |
43 const size_t MINIMAL_NUMBER_OF_PASSWORD_FIELDS = 2; | |
44 const size_t MINIMAL_NUMBER_OF_CHECKBOX_FIELDS = 3; | |
45 const size_t MINIMAL_NUMBER_OF_OTHER_FIELDS = 2; | |
46 | |
47 // Helper function that removes all occurrences of the given character |c| from | |
48 // the string |str|. | |
49 void RemoveAllOccurrencesOfCharacter(std::string* str, char c) { | |
50 str->erase(std::remove(str->begin(), str->end(), c), str->end()); | |
51 } | |
52 | |
53 // Find |features| in |element|'s attribute values. Returns true if at least one | |
54 // text feature was found. | |
55 bool FindTextFeaturesForClass(const blink::WebElement& element, | |
56 const char* const features[], | |
57 size_t number_of_features) { | |
58 for (unsigned i = 0; i < element.attributeCount(); ++i) { | |
59 std::string filtered_value = | |
60 base::ToLowerASCII(element.attributeValue(i).utf8()); | |
61 for (char d : kCharactersToBeRemoved) | |
dvadym
2016/06/09 12:40:59
Can we use regexp for removing symbols?
kolos1
2016/06/10 12:18:08
Done.
| |
62 RemoveAllOccurrencesOfCharacter(&filtered_value, d); | |
63 if (filtered_value.empty()) | |
64 continue; | |
65 for (size_t j = 0; j < number_of_features; j++) { | |
vabr (Chromium)
2016/06/10 13:22:12
nit: ++j
(Let's be consistent and use prefix incre
kolos1
2016/06/13 14:27:34
Done.
| |
66 if (filtered_value.find(features[j]) != std::string::npos) | |
67 return true; | |
68 } | |
69 } | |
70 return false; | |
71 } | |
72 | |
73 // Returns true if at least one captcha feature was found in |element|'s | |
74 // attribute values. | |
75 bool IsCaptchaInput(const blink::WebInputElement& element) { | |
76 return FindTextFeaturesForClass(element, kCaptchaFeatures, | |
77 kNumberOfCaptchaFeatures); | |
78 } | |
79 | |
80 // Finds <img>'s inside |form| and checks if <img>'s attributes contains captcha | |
81 // text features. Returns true, if at least one occurrence was found. | |
82 bool FindCaptchaInImgElements(const blink::WebElement& form, | |
83 bool ingnore_invisible) { | |
84 CR_DEFINE_STATIC_LOCAL(WebString, kImageTag, ("img")); | |
85 | |
86 blink::WebElementCollection img_elements = | |
87 form.getElementsByHTMLTagName(kImageTag); | |
88 for (blink::WebElement element = img_elements.firstItem(); !element.isNull(); | |
89 element = img_elements.nextItem()) { | |
90 if (ingnore_invisible && !form_util::IsWebNodeVisible(element)) | |
91 continue; | |
92 if (FindTextFeaturesForClass(element, kCaptchaFeatures, | |
93 kNumberOfCaptchaFeatures)) | |
94 return true; | |
95 } | |
96 return false; | |
97 } | |
98 | |
99 // Finds signin and signup features in |element|'s attribute values. Sets to | |
100 // true |found_signin_text_features| or |found_signup_text_features| if | |
101 // appropriate features were found. | |
102 void FindTextFeaturesInElement(bool* found_signin_text_features, | |
dvadym
2016/06/09 12:40:59
Input arguments should be before output arguments
kolos1
2016/06/10 12:18:08
Done.
| |
103 bool* found_signup_text_features, | |
dvadym
2016/06/09 12:40:59
Could you please add DCHECK for found_*_text_featu
kolos1
2016/06/10 12:18:08
Done.
| |
104 const blink::WebElement& element) { | |
105 if (!found_signin_text_features) { | |
106 *found_signin_text_features = FindTextFeaturesForClass( | |
107 element, kSigninTextFeatures, kNumberOfSigninFeatures); | |
108 } | |
109 if (!found_signup_text_features) { | |
110 *found_signup_text_features = FindTextFeaturesForClass( | |
111 element, kSignupTextFeatures, kNumberOfSignupFeatures); | |
112 } | |
113 } | |
114 | |
115 // Returns true if |element| has type "button" or "image". | |
116 bool IsButtonOrImageElement(const WebFormControlElement& element) { | |
117 CR_DEFINE_STATIC_LOCAL(WebString, kButton, ("button")); | |
dvadym
2016/06/09 12:40:59
All input type names const are in InputTypeNames.h
kolos1
2016/06/10 12:18:08
I'm not sure we could use this file. As I understa
dvadym
2016/06/10 14:12:09
Acknowledged
| |
118 CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("image")); | |
119 | |
120 return element.formControlType() == kButton || | |
121 element.formControlType() == kImage; | |
122 } | |
123 | |
124 // Returns true if |element| has type "submit". | |
125 bool IsSubmitElement(const WebFormControlElement& element) { | |
126 CR_DEFINE_STATIC_LOCAL(WebString, kSubmit, ("submit")); | |
127 | |
128 return element.formControlType() == kSubmit; | |
129 } | |
130 | |
131 // Returns true if |element| has type "hidden"; | |
132 bool IsHiddenElement(const WebFormControlElement& element) { | |
133 CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden")); | |
134 | |
135 return element.formControlType() == kHidden; | |
136 } | |
137 | |
138 // Returns true if |element| has type "select-multiple" or "select-one". | |
139 bool IsSelectElement(const WebFormControlElement& element) { | |
140 CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one")); | |
141 CR_DEFINE_STATIC_LOCAL(WebString, kSelectMultiple, ("select-multiple")); | |
142 | |
143 return element.formControlType() == kSelectOne || | |
144 element.formControlType() == kSelectMultiple; | |
145 } | |
146 | |
147 // Return true if |form| contains at least one visible password element. | |
148 bool FormContainsVisiblePasswordFields(const blink::WebFormElement& form) { | |
149 WebVector<WebFormControlElement> control_elements; | |
150 form.getFormControlElements(control_elements); | |
151 for (auto& control_element : control_elements) { | |
152 const WebInputElement* input_element = toWebInputElement(&control_element); | |
153 if (!input_element) | |
154 continue; | |
155 if (input_element->isPasswordField() && | |
156 form_util::IsWebNodeVisible(*input_element)) | |
157 return true; | |
158 } | |
159 return false; | |
160 } | |
161 | |
162 // Finds text features in <form> tag of |form| and its ancestors. | |
vabr (Chromium)
2016/06/10 13:22:12
nit: The comment sounds not completely correct. Wh
kolos1
2016/06/13 14:27:34
Fixed the comment.
| |
163 // Sets |found_signin_text_features| and |found_signup_text_features| to true, | |
164 // if corresponding features are found. | |
165 void FindTextFeaturesInFormAndItsAncestors(const blink::WebFormElement& form, | |
dvadym
2016/06/09 12:40:59
The same comment as to FindTextFeaturesInElement a
kolos1
2016/06/10 12:18:08
Done.
| |
166 bool* found_signin_text_features, | |
167 bool* found_signup_text_features) { | |
168 CR_DEFINE_STATIC_LOCAL(WebString, kInput, ("input")); | |
169 | |
170 unsigned number_of_inputs = form.getElementsByHTMLTagName(kInput).length(); | |
171 blink::WebNode parent = form; | |
172 for (; !parent.isNull();) { | |
dvadym
2016/06/09 12:40:59
I'm concerning a little bit in performance of this
kolos1
2016/06/10 12:18:08
I will ask to review Webkit specialist. Thanks.
| |
173 if (parent.getElementsByHTMLTagName(kInput).length() > number_of_inputs) | |
vabr (Chromium)
2016/06/10 13:22:11
+1 to Vadym's worry about performance. This line i
dvadym
2016/06/10 14:12:09
Yeah, perfomance impact is not clear. Independentl
kolos1
2016/06/13 14:27:34
I reduced the number of calls "getElementsByHTMLTa
| |
174 break; | |
175 if (parent.isElementNode()) { | |
176 blink::WebElement element = parent.toConst<blink::WebElement>(); | |
177 bool has_signin_feature = FindTextFeaturesForClass( | |
178 element, kSigninTextFeatures, kNumberOfSigninFeatures); | |
179 bool has_signup_feature = FindTextFeaturesForClass( | |
180 element, kSignupTextFeatures, kNumberOfSignupFeatures); | |
181 if (has_signin_feature && has_signup_feature) | |
182 break; | |
183 if (has_signin_feature) { | |
184 *found_signin_text_features = true; | |
185 break; | |
186 } | |
187 if (has_signup_feature) { | |
188 *found_signup_text_features = true; | |
189 break; | |
190 } | |
191 } | |
192 parent = parent.parentNode(); | |
193 } | |
194 } | |
195 | |
196 } // namespace | |
197 | |
198 bool ClassifyFormAndFindGenerationField(const blink::WebFormElement& form, | |
199 base::string16* generation_field) { | |
200 DCHECK(generation_field); | |
201 | |
202 if (form.isNull()) | |
203 return false; | |
204 | |
205 bool ignore_invisible_elements = FormContainsVisiblePasswordFields(form); | |
206 | |
207 bool found_signin_text_features = false; | |
208 bool found_signup_text_features = false; | |
209 size_t number_of_text_input_fields = 0; | |
210 size_t number_of_password_input_fields = 0; | |
211 size_t number_of_checkbox_input_fields = 0; | |
212 size_t number_of_other_input_fields = 0; | |
213 bool found_captcha = | |
214 FindCaptchaInImgElements(form, ignore_invisible_elements); | |
215 | |
216 FindTextFeaturesInFormAndItsAncestors(form, &found_signin_text_features, | |
217 &found_signup_text_features); | |
218 | |
219 std::vector<WebInputElement> passwords; | |
220 WebVector<WebFormControlElement> control_elements; | |
221 form.getFormControlElements(control_elements); | |
222 | |
223 for (size_t i = 0; i < control_elements.size(); ++i) { | |
vabr (Chromium)
2016/06/10 13:22:11
nit: Compress the first two line into:
for (const
kolos1
2016/06/13 14:27:34
Done.
| |
224 WebFormControlElement control_element = control_elements[i]; | |
225 bool element_is_invisible = !form_util::IsWebNodeVisible(control_element); | |
vabr (Chromium)
2016/06/10 13:22:12
IsWebNodeVisible might be expensive, please only c
kolos1
2016/06/13 14:27:34
Done.
| |
226 if ((element_is_invisible && ignore_invisible_elements) || | |
227 IsHiddenElement(control_element)) | |
vabr (Chromium)
2016/06/10 13:22:11
nit: "if" statements with more than 2 lines in tot
kolos1
2016/06/13 14:27:34
Done.
| |
228 continue; | |
229 | |
230 // If type="button" or "image", skip them, because it might be a link | |
231 // to another form. | |
232 if (IsButtonOrImageElement(control_element)) | |
233 continue; | |
234 | |
235 FindTextFeaturesInElement(&found_signin_text_features, | |
236 &found_signup_text_features, control_element); | |
237 | |
238 // Since <select> is not WebInputElement, but WebSelectElement, process | |
239 // them as a special case. | |
240 if (IsSelectElement(control_element)) { | |
241 number_of_other_input_fields++; | |
vabr (Chromium)
2016/06/10 13:22:12
nit: Here and below: please use prefix ++ unless y
kolos1
2016/06/13 14:27:34
Done.
| |
242 } else { | |
243 WebInputElement* input_element = toWebInputElement(&control_element); | |
244 if (!input_element) | |
245 continue; | |
246 | |
247 if (input_element->isTextField()) { | |
248 if (input_element->isPasswordField()) { | |
249 number_of_password_input_fields++; | |
250 passwords.push_back(*input_element); | |
251 } else { | |
252 number_of_text_input_fields++; | |
253 found_captcha = found_captcha || IsCaptchaInput(*input_element); | |
254 } | |
255 } else { // Non-text fields. | |
256 if (input_element->isCheckbox()) | |
257 number_of_checkbox_input_fields++; | |
258 else if (!IsSubmitElement(*input_element)) | |
259 number_of_other_input_fields++; | |
260 } | |
261 } | |
262 } | |
263 | |
264 if (number_of_password_input_fields == 0) | |
265 return false; | |
266 | |
267 if ((number_of_text_input_fields - found_captcha >= | |
268 MINIMAL_NUMBER_OF_TEXT_FIELDS || | |
269 number_of_password_input_fields >= MINIMAL_NUMBER_OF_PASSWORD_FIELDS || | |
270 number_of_checkbox_input_fields >= MINIMAL_NUMBER_OF_CHECKBOX_FIELDS || | |
271 number_of_other_input_fields >= MINIMAL_NUMBER_OF_OTHER_FIELDS) || | |
272 (found_signup_text_features && !found_signin_text_features)) { | |
273 WebInputElement password_creation_field; | |
274 | |
275 // TODO(crbug.com/618309): Improve local classifier to distinguish password | |
276 // creation and password usage fields on the change password forms. | |
277 if (passwords.size() == 3) | |
278 password_creation_field = passwords[1]; | |
279 else | |
280 password_creation_field = passwords[0]; | |
dvadym
2016/06/09 12:40:59
What's about case when passwords.size() > 3? It co
kolos1
2016/06/10 12:18:08
I saw one site where there were 4 password fields
| |
281 | |
282 *generation_field = password_creation_field.nameForAutofill(); | |
283 return true; | |
284 } | |
285 return false; | |
286 } | |
287 } | |
OLD | NEW |