OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "components/autofill/content/renderer/form_classifier.h" | |
6 | |
7 #include <algorithm> | |
8 | |
9 #include "base/macros.h" | |
10 #include "base/strings/string16.h" | |
11 #include "base/strings/string_util.h" | |
12 #include "components/autofill/content/renderer/form_autofill_util.h" | |
13 #include "third_party/WebKit/public/platform/WebString.h" | |
14 #include "third_party/WebKit/public/platform/WebVector.h" | |
15 #include "third_party/WebKit/public/web/WebFormControlElement.h" | |
16 #include "third_party/WebKit/public/web/WebInputElement.h" | |
17 #include "third_party/re2/src/re2/re2.h" | |
18 | |
19 using autofill::form_util::WebFormControlElementToFormField; | |
20 using blink::WebFormControlElement; | |
21 using blink::WebInputElement; | |
22 using blink::WebString; | |
23 using blink::WebVector; | |
24 | |
25 namespace autofill { | |
26 | |
27 namespace { | |
28 | |
29 // The words that frequently appear in attribute values of signin forms. | |
30 const char* const kSigninTextFeatures[] = {"signin", "login", "logon", "auth"}; | |
31 constexpr size_t kNumberOfSigninFeatures = arraysize(kSigninTextFeatures); | |
32 | |
33 // The words that frequently appear in attribute values of signup forms. | |
34 const char* const kSignupTextFeatures[] = {"signup", "regist", "creat"}; | |
35 constexpr size_t kNumberOfSignupFeatures = arraysize(kSignupTextFeatures); | |
36 | |
37 // The words that frequently appear in attribute values of captcha elements. | |
38 const char* const kCaptchaFeatures[] = {"captcha", "security", "code"}; | |
39 constexpr size_t kNumberOfCaptchaFeatures = arraysize(kCaptchaFeatures); | |
40 | |
41 // Minimal number of input fields to classify form as signup or change password | |
42 // form. If at least one of the listed thresholds is reached or exceeded, the | |
43 // form is classified as a form where password generation should be available. | |
44 constexpr size_t MINIMAL_NUMBER_OF_TEXT_FIELDS = 2; | |
45 constexpr size_t MINIMAL_NUMBER_OF_PASSWORD_FIELDS = 2; | |
46 constexpr size_t MINIMAL_NUMBER_OF_CHECKBOX_FIELDS = 3; | |
47 constexpr size_t MINIMAL_NUMBER_OF_OTHER_FIELDS = 2; | |
48 | |
49 // Removes some characters from attribute value. | |
50 void ClearAttributeValue(std::string* value) { | |
51 value->erase(std::remove_if(value->begin(), value->end(), | |
52 [](char x) { return x == '-' || x == '_'; }), | |
53 value->end()); | |
54 } | |
55 | |
56 // Find |features| in |element|'s attribute values. Returns true if at least one | |
57 // text feature was found. | |
58 bool FindTextFeaturesForClass(const blink::WebElement& element, | |
59 const char* const features[], | |
60 size_t number_of_features) { | |
61 DCHECK(features); | |
62 | |
63 for (unsigned i = 0; i < element.attributeCount(); ++i) { | |
64 std::string filtered_value = | |
65 base::ToLowerASCII(element.attributeValue(i).utf8()); | |
66 ClearAttributeValue(&filtered_value); | |
67 | |
68 if (filtered_value.empty()) | |
69 continue; | |
70 for (size_t j = 0; j < number_of_features; ++j) { | |
71 if (filtered_value.find(features[j]) != std::string::npos) | |
72 return true; | |
73 } | |
74 } | |
75 return false; | |
76 } | |
77 | |
78 // Returns true if at least one captcha feature was found in |element|'s | |
79 // attribute values. | |
80 bool IsCaptchaInput(const blink::WebInputElement& element) { | |
81 return FindTextFeaturesForClass(element, kCaptchaFeatures, | |
82 kNumberOfCaptchaFeatures); | |
83 } | |
84 | |
85 // Finds <img>'s inside |form| and checks if <img>'s attributes contains captcha | |
86 // text features. Returns true, if at least one occurrence was found. | |
87 bool FindCaptchaInImgElements(const blink::WebElement& form, | |
88 bool ingnore_invisible) { | |
89 CR_DEFINE_STATIC_LOCAL(WebString, kImageTag, ("img")); | |
90 | |
91 blink::WebElementCollection img_elements = | |
92 form.getElementsByHTMLTagName(kImageTag); | |
93 for (blink::WebElement element = img_elements.firstItem(); !element.isNull(); | |
94 element = img_elements.nextItem()) { | |
95 if (ingnore_invisible && !form_util::IsWebNodeVisible(element)) | |
96 continue; | |
97 if (FindTextFeaturesForClass(element, kCaptchaFeatures, | |
98 kNumberOfCaptchaFeatures)) { | |
99 return true; | |
100 } | |
101 } | |
102 return false; | |
103 } | |
104 | |
105 // Finds signin and signup features in |element|'s attribute values. Sets to | |
106 // true |found_signin_text_features| or |found_signup_text_features| if | |
107 // appropriate features were found. | |
108 void FindTextFeaturesInElement(const blink::WebElement& element, | |
109 bool* found_signin_text_features, | |
110 bool* found_signup_text_features) { | |
111 DCHECK(found_signin_text_features); | |
112 DCHECK(found_signup_text_features); | |
113 | |
114 if (!*found_signin_text_features) { | |
115 *found_signin_text_features = FindTextFeaturesForClass( | |
116 element, kSigninTextFeatures, kNumberOfSigninFeatures); | |
117 } | |
118 if (!*found_signup_text_features) { | |
119 *found_signup_text_features = FindTextFeaturesForClass( | |
120 element, kSignupTextFeatures, kNumberOfSignupFeatures); | |
121 } | |
122 } | |
123 | |
124 // Returns true if |element| has type "button" or "image". | |
125 bool IsButtonOrImageElement(const WebFormControlElement& element) { | |
126 CR_DEFINE_STATIC_LOCAL(WebString, kButton, ("button")); | |
127 CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("image")); | |
128 | |
129 return element.formControlType() == kButton || | |
130 element.formControlType() == kImage; | |
131 } | |
132 | |
133 // Returns true if |element| has type "submit". | |
134 bool IsSubmitElement(const WebFormControlElement& element) { | |
135 CR_DEFINE_STATIC_LOCAL(WebString, kSubmit, ("submit")); | |
136 | |
137 return element.formControlType() == kSubmit; | |
138 } | |
139 | |
140 // Returns true if |element| has type "hidden"; | |
141 bool IsHiddenElement(const WebFormControlElement& element) { | |
142 CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden")); | |
143 | |
144 return element.formControlType() == kHidden; | |
145 } | |
146 | |
147 // Returns true if |element| has type "select-multiple" or "select-one". | |
148 bool IsSelectElement(const WebFormControlElement& element) { | |
149 CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one")); | |
150 CR_DEFINE_STATIC_LOCAL(WebString, kSelectMultiple, ("select-multiple")); | |
151 | |
152 return element.formControlType() == kSelectOne || | |
153 element.formControlType() == kSelectMultiple; | |
154 } | |
155 | |
156 // Return true if |form| contains at least one visible password element. | |
157 bool FormContainsVisiblePasswordFields(const blink::WebFormElement& form) { | |
158 WebVector<WebFormControlElement> control_elements; | |
159 form.getFormControlElements(control_elements); | |
160 for (auto& control_element : control_elements) { | |
161 const WebInputElement* input_element = toWebInputElement(&control_element); | |
162 if (!input_element) | |
163 continue; | |
164 if (input_element->isPasswordField() && | |
165 form_util::IsWebNodeVisible(*input_element)) { | |
166 return true; | |
167 } | |
168 } | |
169 return false; | |
170 } | |
171 | |
172 // Finds text features in |form| (only <form>'s attribute values, but not its | |
173 // descendants) and its ancestors (only attribute values of the parental nodes, | |
174 // but not the whole tree of the ancestors). | |
175 // Sets |found_signin_text_features| and |found_signup_text_features| to true, | |
176 // if corresponding features are found. | |
177 void FindTextFeaturesInFormAndItsAncestors(const blink::WebFormElement& form, | |
178 bool* found_signin_text_features, | |
179 bool* found_signup_text_features) { | |
180 CR_DEFINE_STATIC_LOCAL(WebString, kInput, ("input")); | |
181 | |
182 DCHECK(found_signin_text_features); | |
183 DCHECK(found_signup_text_features); | |
184 | |
185 unsigned number_of_inputs = form.getElementsByHTMLTagName(kInput).length(); | |
186 blink::WebNode parent = form; | |
187 for (; !parent.isNull();) { | |
188 if (parent.isElementNode()) { | |
189 blink::WebElement element = parent.toConst<blink::WebElement>(); | |
190 bool has_signin_feature = FindTextFeaturesForClass( | |
191 element, kSigninTextFeatures, kNumberOfSigninFeatures); | |
192 bool has_signup_feature = FindTextFeaturesForClass( | |
193 element, kSignupTextFeatures, kNumberOfSignupFeatures); | |
194 if (has_signin_feature && has_signup_feature) | |
195 break; | |
196 // If this ancestor contains more input elements, the ancestor might | |
197 // include another form. So, ignore the feature. | |
198 if (parent.getElementsByHTMLTagName(kInput).length() > number_of_inputs) | |
199 break; | |
200 if (has_signin_feature) { | |
201 *found_signin_text_features = true; | |
202 break; | |
203 } | |
204 if (has_signup_feature) { | |
205 *found_signup_text_features = true; | |
206 break; | |
207 } | |
208 } | |
209 parent = parent.parentNode(); | |
210 } | |
211 } | |
212 | |
213 } // namespace | |
214 | |
215 bool ClassifyFormAndFindGenerationField(const blink::WebFormElement& form, | |
216 base::string16* generation_field) { | |
217 DCHECK(generation_field); | |
218 | |
219 if (form.isNull()) | |
220 return false; | |
221 | |
222 bool ignore_invisible_elements = FormContainsVisiblePasswordFields(form); | |
223 | |
224 bool found_signin_text_features = false; | |
225 bool found_signup_text_features = false; | |
226 size_t number_of_text_input_fields = 0; | |
227 size_t number_of_password_input_fields = 0; | |
228 size_t number_of_checkbox_input_fields = 0; | |
229 size_t number_of_other_input_fields = 0; | |
230 bool found_captcha = | |
231 FindCaptchaInImgElements(form, ignore_invisible_elements); | |
232 | |
233 FindTextFeaturesInFormAndItsAncestors(form, &found_signin_text_features, | |
234 &found_signup_text_features); | |
235 | |
236 std::vector<WebInputElement> passwords; | |
237 WebVector<WebFormControlElement> control_elements; | |
238 form.getFormControlElements(control_elements); | |
239 | |
240 for (const WebFormControlElement& control_element : control_elements) { | |
241 if (IsHiddenElement(control_element)) | |
242 continue; | |
243 if (ignore_invisible_elements) { | |
244 bool element_is_invisible = !form_util::IsWebNodeVisible(control_element); | |
vabr (Chromium)
2016/06/13 16:01:57
nit: Shorter form is:
if (!form_util::IsWebNodeVis
kolos1
2016/06/15 14:15:15
Done.
| |
245 if (element_is_invisible) | |
246 continue; | |
247 } | |
248 | |
249 // If type="button" or "image", skip them, because it might be a link | |
250 // to another form. | |
251 if (IsButtonOrImageElement(control_element)) | |
252 continue; | |
253 | |
254 FindTextFeaturesInElement(control_element, &found_signin_text_features, | |
255 &found_signup_text_features); | |
256 | |
257 // Since <select> is not WebInputElement, but WebSelectElement, process | |
258 // them as a special case. | |
259 if (IsSelectElement(control_element)) { | |
260 number_of_other_input_fields++; | |
261 } else { | |
262 const WebInputElement* input_element = | |
263 toWebInputElement(&control_element); | |
264 if (!input_element) | |
265 continue; | |
266 | |
267 if (input_element->isTextField()) { | |
268 if (input_element->isPasswordField()) { | |
269 ++number_of_password_input_fields; | |
270 passwords.push_back(*input_element); | |
271 } else { | |
272 ++number_of_text_input_fields; | |
273 found_captcha = found_captcha || IsCaptchaInput(*input_element); | |
274 } | |
275 } else { // Non-text fields. | |
276 if (input_element->isCheckbox()) | |
277 ++number_of_checkbox_input_fields; | |
278 else if (!IsSubmitElement(*input_element)) | |
279 ++number_of_other_input_fields; | |
280 } | |
281 } | |
282 } | |
283 | |
284 if (number_of_password_input_fields == 0 || | |
285 number_of_password_input_fields > 3) | |
286 return false; | |
287 | |
288 if ((number_of_text_input_fields - found_captcha >= | |
289 MINIMAL_NUMBER_OF_TEXT_FIELDS || | |
290 number_of_password_input_fields >= MINIMAL_NUMBER_OF_PASSWORD_FIELDS || | |
291 number_of_checkbox_input_fields >= MINIMAL_NUMBER_OF_CHECKBOX_FIELDS || | |
292 number_of_other_input_fields >= MINIMAL_NUMBER_OF_OTHER_FIELDS) || | |
293 (found_signup_text_features && !found_signin_text_features)) { | |
294 WebInputElement password_creation_field; | |
295 | |
296 // TODO(crbug.com/618309): Improve local classifier to distinguish password | |
297 // creation and password usage fields on the change password forms. | |
298 if (passwords.size() == 3) | |
299 password_creation_field = passwords[1]; | |
300 else | |
301 password_creation_field = passwords[0]; | |
302 | |
303 *generation_field = password_creation_field.nameForAutofill(); | |
304 return true; | |
305 } | |
306 return false; | |
307 } | |
308 } | |
OLD | NEW |