Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(171)

Side by Side Diff: components/autofill/content/renderer/form_classifier.cc

Issue 1883183002: [Password Manager] HTML parsing based client-side form type classifier (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Check <form>'s attributes Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/autofill/content/renderer/form_classifier.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/autofill/content/renderer/form_classifier.h"
6
7 #include <algorithm>
8
9 #include "base/macros.h"
10 #include "base/strings/string16.h"
11 #include "base/strings/string_util.h"
12 #include "components/autofill/content/renderer/form_autofill_util.h"
13 #include "third_party/WebKit/public/platform/WebString.h"
14 #include "third_party/WebKit/public/platform/WebVector.h"
15 #include "third_party/WebKit/public/web/WebFormControlElement.h"
16 #include "third_party/WebKit/public/web/WebInputElement.h"
17
18 using autofill::form_util::WebFormControlElementToFormField;
19 using blink::WebFormControlElement;
20 using blink::WebInputElement;
21 using blink::WebString;
22 using blink::WebVector;
23
24 namespace autofill {
25
26 namespace {
27
28 // The words that frequently appear in attribute values of signin forms.
29 const char* const kSigninTextFeatures[] = {"signin", "login", "logon", "auth"};
30 constexpr size_t kNumberOfSigninFeatures = arraysize(kSigninTextFeatures);
31
32 // The words that frequently appear in attribute values of signup forms.
33 const char* const kSignupTextFeatures[] = {"signup", "regist", "creat"};
34 constexpr size_t kNumberOfSignupFeatures = arraysize(kSignupTextFeatures);
35
36 // The words that frequently appear in attribute values of captcha elements.
37 const char* const kCaptchaFeatures[] = {"captcha", "security", "code"};
38 constexpr size_t kNumberOfCaptchaFeatures = arraysize(kCaptchaFeatures);
39
40 // Minimal number of input fields to classify form as signup or change password
41 // form. If at least one of the listed thresholds is reached or exceeded, the
42 // form is classified as a form where password generation should be available.
43 constexpr size_t MINIMAL_NUMBER_OF_TEXT_FIELDS = 2;
44 constexpr size_t MINIMAL_NUMBER_OF_PASSWORD_FIELDS = 2;
45 constexpr size_t MINIMAL_NUMBER_OF_CHECKBOX_FIELDS = 3;
46 constexpr size_t MINIMAL_NUMBER_OF_OTHER_FIELDS = 2;
47
48 // Removes some characters from attribute value.
49 void ClearAttributeValue(std::string* value) {
50 value->erase(std::remove_if(value->begin(), value->end(),
51 [](char x) { return x == '-' || x == '_'; }),
52 value->end());
53 }
54
55 // Find |features| in |element|'s attribute values. Returns true if at least one
56 // text feature was found.
57 bool FindTextFeaturesForClass(const blink::WebElement& element,
58 const char* const features[],
59 size_t number_of_features) {
60 DCHECK(features);
61
62 for (unsigned i = 0; i < element.attributeCount(); ++i) {
63 std::string filtered_value =
64 base::ToLowerASCII(element.attributeValue(i).utf8());
65 ClearAttributeValue(&filtered_value);
66
67 if (filtered_value.empty())
68 continue;
69 for (size_t j = 0; j < number_of_features; ++j) {
70 if (filtered_value.find(features[j]) != std::string::npos)
71 return true;
72 }
73 }
74 return false;
75 }
76
77 // Returns true if at least one captcha feature was found in |element|'s
78 // attribute values.
79 bool IsCaptchaInput(const blink::WebInputElement& element) {
80 return FindTextFeaturesForClass(element, kCaptchaFeatures,
81 kNumberOfCaptchaFeatures);
82 }
83
84 // Finds <img>'s inside |form| and checks if <img>'s attributes contains captcha
85 // text features. Returns true, if at least one occurrence was found.
86 bool FindCaptchaInImgElements(const blink::WebElement& form,
87 bool ingnore_invisible) {
88 CR_DEFINE_STATIC_LOCAL(WebString, kImageTag, ("img"));
89
90 blink::WebElementCollection img_elements =
91 form.getElementsByHTMLTagName(kImageTag);
92 for (blink::WebElement element = img_elements.firstItem(); !element.isNull();
93 element = img_elements.nextItem()) {
94 if (ingnore_invisible && !form_util::IsWebNodeVisible(element))
95 continue;
96 if (FindTextFeaturesForClass(element, kCaptchaFeatures,
97 kNumberOfCaptchaFeatures)) {
98 return true;
99 }
100 }
101 return false;
102 }
103
104 // Finds signin and signup features in |element|'s attribute values. Sets to
105 // true |found_signin_text_features| or |found_signup_text_features| if
106 // appropriate features were found.
107 void FindTextFeaturesInElement(const blink::WebElement& element,
108 bool* found_signin_text_features,
109 bool* found_signup_text_features) {
110 DCHECK(found_signin_text_features);
111 DCHECK(found_signup_text_features);
112
113 if (!*found_signin_text_features) {
114 *found_signin_text_features = FindTextFeaturesForClass(
115 element, kSigninTextFeatures, kNumberOfSigninFeatures);
116 }
117 if (!*found_signup_text_features) {
118 *found_signup_text_features = FindTextFeaturesForClass(
119 element, kSignupTextFeatures, kNumberOfSignupFeatures);
120 }
121 }
122
123 // Returns true if |element| has type "button" or "image".
124 bool IsButtonOrImageElement(const WebFormControlElement& element) {
125 CR_DEFINE_STATIC_LOCAL(WebString, kButton, ("button"));
126 CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("image"));
127
128 return element.formControlType() == kButton ||
129 element.formControlType() == kImage;
130 }
131
132 // Returns true if |element| has type "submit".
133 bool IsSubmitElement(const WebFormControlElement& element) {
134 CR_DEFINE_STATIC_LOCAL(WebString, kSubmit, ("submit"));
135
136 return element.formControlType() == kSubmit;
137 }
138
139 // Returns true if |element| has type "hidden";
140 bool IsHiddenElement(const WebFormControlElement& element) {
141 CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden"));
142
143 return element.formControlType() == kHidden;
144 }
145
146 // Returns true if |element| has type "select-multiple" or "select-one".
147 bool IsSelectElement(const WebFormControlElement& element) {
148 CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one"));
149 CR_DEFINE_STATIC_LOCAL(WebString, kSelectMultiple, ("select-multiple"));
150
151 return element.formControlType() == kSelectOne ||
152 element.formControlType() == kSelectMultiple;
153 }
154
155 // Return true if |form| contains at least one visible password element.
156 bool FormContainsVisiblePasswordFields(const blink::WebFormElement& form) {
157 WebVector<WebFormControlElement> control_elements;
158 form.getFormControlElements(control_elements);
159 for (auto& control_element : control_elements) {
160 const WebInputElement* input_element = toWebInputElement(&control_element);
161 if (!input_element)
162 continue;
163 if (input_element->isPasswordField() &&
164 form_util::IsWebNodeVisible(*input_element)) {
165 return true;
166 }
167 }
168 return false;
169 }
170
171 } // namespace
172
173 bool ClassifyFormAndFindGenerationField(const blink::WebFormElement& form,
174 base::string16* generation_field) {
175 DCHECK(generation_field);
176
177 if (form.isNull())
178 return false;
179
180 bool ignore_invisible_elements = FormContainsVisiblePasswordFields(form);
181
182 bool found_signin_text_features = false;
183 bool found_signup_text_features = false;
184 size_t number_of_text_input_fields = 0;
185 size_t number_of_password_input_fields = 0;
186 size_t number_of_checkbox_input_fields = 0;
187 size_t number_of_other_input_fields = 0;
188 bool found_captcha =
189 FindCaptchaInImgElements(form, ignore_invisible_elements);
190
191 FindTextFeaturesInElement(form, &found_signin_text_features,
192 &found_signup_text_features);
193
194 std::vector<WebInputElement> passwords;
195 WebVector<WebFormControlElement> control_elements;
196 form.getFormControlElements(control_elements);
197
198 for (const WebFormControlElement& control_element : control_elements) {
199 if (IsHiddenElement(control_element))
200 continue;
201 if (ignore_invisible_elements) {
202 if (!form_util::IsWebNodeVisible(control_element))
203 continue;
204 }
205
206 // If type="button" or "image", skip them, because it might be a link
207 // to another form.
208 if (IsButtonOrImageElement(control_element))
209 continue;
210
211 FindTextFeaturesInElement(control_element, &found_signin_text_features,
212 &found_signup_text_features);
213
214 // Since <select> is not WebInputElement, but WebSelectElement, process
215 // them as a special case.
216 if (IsSelectElement(control_element)) {
217 number_of_other_input_fields++;
218 } else {
219 const WebInputElement* input_element =
220 toWebInputElement(&control_element);
221 if (!input_element)
222 continue;
223
224 if (input_element->isTextField()) {
225 if (input_element->isPasswordField()) {
226 ++number_of_password_input_fields;
227 passwords.push_back(*input_element);
228 } else {
229 ++number_of_text_input_fields;
230 found_captcha = found_captcha || IsCaptchaInput(*input_element);
231 }
232 } else { // Non-text fields.
233 if (input_element->isCheckbox())
234 ++number_of_checkbox_input_fields;
235 else if (!IsSubmitElement(*input_element))
236 ++number_of_other_input_fields;
237 }
238 }
239 }
240
241 if (number_of_password_input_fields == 0 ||
242 number_of_password_input_fields > 3)
243 return false;
244
245 if ((number_of_text_input_fields - found_captcha >=
246 MINIMAL_NUMBER_OF_TEXT_FIELDS ||
247 number_of_password_input_fields >= MINIMAL_NUMBER_OF_PASSWORD_FIELDS ||
248 number_of_checkbox_input_fields >= MINIMAL_NUMBER_OF_CHECKBOX_FIELDS ||
249 number_of_other_input_fields >= MINIMAL_NUMBER_OF_OTHER_FIELDS) ||
250 (found_signup_text_features && !found_signin_text_features)) {
251 WebInputElement password_creation_field;
252
253 // TODO(crbug.com/618309): Improve local classifier to distinguish password
254 // creation and password usage fields on the change password forms.
255 if (passwords.size() == 3)
256 password_creation_field = passwords[1];
257 else
258 password_creation_field = passwords[0];
259
260 *generation_field = password_creation_field.nameForAutofill();
261 return true;
262 }
263 return false;
264 }
265 }
OLDNEW
« no previous file with comments | « components/autofill/content/renderer/form_classifier.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698