OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "components/autofill/content/renderer/form_classifier.h" |
| 6 |
| 7 #include <algorithm> |
| 8 |
| 9 #include "base/macros.h" |
| 10 #include "base/strings/string16.h" |
| 11 #include "base/strings/string_util.h" |
| 12 #include "components/autofill/content/renderer/form_autofill_util.h" |
| 13 #include "third_party/WebKit/public/platform/WebString.h" |
| 14 #include "third_party/WebKit/public/platform/WebVector.h" |
| 15 #include "third_party/WebKit/public/web/WebFormControlElement.h" |
| 16 #include "third_party/WebKit/public/web/WebInputElement.h" |
| 17 |
| 18 using autofill::form_util::WebFormControlElementToFormField; |
| 19 using blink::WebFormControlElement; |
| 20 using blink::WebInputElement; |
| 21 using blink::WebString; |
| 22 using blink::WebVector; |
| 23 |
| 24 namespace autofill { |
| 25 |
| 26 namespace { |
| 27 |
| 28 // The words that frequently appear in attribute values of signin forms. |
| 29 const char* const kSigninTextFeatures[] = {"signin", "login", "logon", "auth"}; |
| 30 constexpr size_t kNumberOfSigninFeatures = arraysize(kSigninTextFeatures); |
| 31 |
| 32 // The words that frequently appear in attribute values of signup forms. |
| 33 const char* const kSignupTextFeatures[] = {"signup", "regist", "creat"}; |
| 34 constexpr size_t kNumberOfSignupFeatures = arraysize(kSignupTextFeatures); |
| 35 |
| 36 // The words that frequently appear in attribute values of captcha elements. |
| 37 const char* const kCaptchaFeatures[] = {"captcha", "security", "code"}; |
| 38 constexpr size_t kNumberOfCaptchaFeatures = arraysize(kCaptchaFeatures); |
| 39 |
| 40 // Minimal number of input fields to classify form as signup or change password |
| 41 // form. If at least one of the listed thresholds is reached or exceeded, the |
| 42 // form is classified as a form where password generation should be available. |
| 43 constexpr size_t MINIMAL_NUMBER_OF_TEXT_FIELDS = 2; |
| 44 constexpr size_t MINIMAL_NUMBER_OF_PASSWORD_FIELDS = 2; |
| 45 constexpr size_t MINIMAL_NUMBER_OF_CHECKBOX_FIELDS = 3; |
| 46 constexpr size_t MINIMAL_NUMBER_OF_OTHER_FIELDS = 2; |
| 47 |
| 48 // Removes some characters from attribute value. |
| 49 void ClearAttributeValue(std::string* value) { |
| 50 value->erase(std::remove_if(value->begin(), value->end(), |
| 51 [](char x) { return x == '-' || x == '_'; }), |
| 52 value->end()); |
| 53 } |
| 54 |
| 55 // Find |features| in |element|'s attribute values. Returns true if at least one |
| 56 // text feature was found. |
| 57 bool FindTextFeaturesForClass(const blink::WebElement& element, |
| 58 const char* const features[], |
| 59 size_t number_of_features) { |
| 60 DCHECK(features); |
| 61 |
| 62 for (unsigned i = 0; i < element.attributeCount(); ++i) { |
| 63 std::string filtered_value = |
| 64 base::ToLowerASCII(element.attributeValue(i).utf8()); |
| 65 ClearAttributeValue(&filtered_value); |
| 66 |
| 67 if (filtered_value.empty()) |
| 68 continue; |
| 69 for (size_t j = 0; j < number_of_features; ++j) { |
| 70 if (filtered_value.find(features[j]) != std::string::npos) |
| 71 return true; |
| 72 } |
| 73 } |
| 74 return false; |
| 75 } |
| 76 |
| 77 // Returns true if at least one captcha feature was found in |element|'s |
| 78 // attribute values. |
| 79 bool IsCaptchaInput(const blink::WebInputElement& element) { |
| 80 return FindTextFeaturesForClass(element, kCaptchaFeatures, |
| 81 kNumberOfCaptchaFeatures); |
| 82 } |
| 83 |
| 84 // Finds <img>'s inside |form| and checks if <img>'s attributes contains captcha |
| 85 // text features. Returns true, if at least one occurrence was found. |
| 86 bool FindCaptchaInImgElements(const blink::WebElement& form, |
| 87 bool ingnore_invisible) { |
| 88 CR_DEFINE_STATIC_LOCAL(WebString, kImageTag, ("img")); |
| 89 |
| 90 blink::WebElementCollection img_elements = |
| 91 form.getElementsByHTMLTagName(kImageTag); |
| 92 for (blink::WebElement element = img_elements.firstItem(); !element.isNull(); |
| 93 element = img_elements.nextItem()) { |
| 94 if (ingnore_invisible && !form_util::IsWebNodeVisible(element)) |
| 95 continue; |
| 96 if (FindTextFeaturesForClass(element, kCaptchaFeatures, |
| 97 kNumberOfCaptchaFeatures)) { |
| 98 return true; |
| 99 } |
| 100 } |
| 101 return false; |
| 102 } |
| 103 |
| 104 // Finds signin and signup features in |element|'s attribute values. Sets to |
| 105 // true |found_signin_text_features| or |found_signup_text_features| if |
| 106 // appropriate features were found. |
| 107 void FindTextFeaturesInElement(const blink::WebElement& element, |
| 108 bool* found_signin_text_features, |
| 109 bool* found_signup_text_features) { |
| 110 DCHECK(found_signin_text_features); |
| 111 DCHECK(found_signup_text_features); |
| 112 |
| 113 if (!*found_signin_text_features) { |
| 114 *found_signin_text_features = FindTextFeaturesForClass( |
| 115 element, kSigninTextFeatures, kNumberOfSigninFeatures); |
| 116 } |
| 117 if (!*found_signup_text_features) { |
| 118 *found_signup_text_features = FindTextFeaturesForClass( |
| 119 element, kSignupTextFeatures, kNumberOfSignupFeatures); |
| 120 } |
| 121 } |
| 122 |
| 123 // Returns true if |element| has type "button" or "image". |
| 124 bool IsButtonOrImageElement(const WebFormControlElement& element) { |
| 125 CR_DEFINE_STATIC_LOCAL(WebString, kButton, ("button")); |
| 126 CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("image")); |
| 127 |
| 128 return element.formControlType() == kButton || |
| 129 element.formControlType() == kImage; |
| 130 } |
| 131 |
| 132 // Returns true if |element| has type "submit". |
| 133 bool IsSubmitElement(const WebFormControlElement& element) { |
| 134 CR_DEFINE_STATIC_LOCAL(WebString, kSubmit, ("submit")); |
| 135 |
| 136 return element.formControlType() == kSubmit; |
| 137 } |
| 138 |
| 139 // Returns true if |element| has type "hidden"; |
| 140 bool IsHiddenElement(const WebFormControlElement& element) { |
| 141 CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden")); |
| 142 |
| 143 return element.formControlType() == kHidden; |
| 144 } |
| 145 |
| 146 // Returns true if |element| has type "select-multiple" or "select-one". |
| 147 bool IsSelectElement(const WebFormControlElement& element) { |
| 148 CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one")); |
| 149 CR_DEFINE_STATIC_LOCAL(WebString, kSelectMultiple, ("select-multiple")); |
| 150 |
| 151 return element.formControlType() == kSelectOne || |
| 152 element.formControlType() == kSelectMultiple; |
| 153 } |
| 154 |
| 155 // Return true if |form| contains at least one visible password element. |
| 156 bool FormContainsVisiblePasswordFields(const blink::WebFormElement& form) { |
| 157 WebVector<WebFormControlElement> control_elements; |
| 158 form.getFormControlElements(control_elements); |
| 159 for (auto& control_element : control_elements) { |
| 160 const WebInputElement* input_element = toWebInputElement(&control_element); |
| 161 if (!input_element) |
| 162 continue; |
| 163 if (input_element->isPasswordField() && |
| 164 form_util::IsWebNodeVisible(*input_element)) { |
| 165 return true; |
| 166 } |
| 167 } |
| 168 return false; |
| 169 } |
| 170 |
| 171 } // namespace |
| 172 |
| 173 bool ClassifyFormAndFindGenerationField(const blink::WebFormElement& form, |
| 174 base::string16* generation_field) { |
| 175 DCHECK(generation_field); |
| 176 |
| 177 if (form.isNull()) |
| 178 return false; |
| 179 |
| 180 bool ignore_invisible_elements = FormContainsVisiblePasswordFields(form); |
| 181 |
| 182 bool found_signin_text_features = false; |
| 183 bool found_signup_text_features = false; |
| 184 size_t number_of_text_input_fields = 0; |
| 185 size_t number_of_password_input_fields = 0; |
| 186 size_t number_of_checkbox_input_fields = 0; |
| 187 size_t number_of_other_input_fields = 0; |
| 188 bool found_captcha = |
| 189 FindCaptchaInImgElements(form, ignore_invisible_elements); |
| 190 |
| 191 FindTextFeaturesInElement(form, &found_signin_text_features, |
| 192 &found_signup_text_features); |
| 193 |
| 194 std::vector<WebInputElement> passwords; |
| 195 WebVector<WebFormControlElement> control_elements; |
| 196 form.getFormControlElements(control_elements); |
| 197 |
| 198 for (const WebFormControlElement& control_element : control_elements) { |
| 199 if (IsHiddenElement(control_element)) |
| 200 continue; |
| 201 if (ignore_invisible_elements) { |
| 202 if (!form_util::IsWebNodeVisible(control_element)) |
| 203 continue; |
| 204 } |
| 205 |
| 206 // If type="button" or "image", skip them, because it might be a link |
| 207 // to another form. |
| 208 if (IsButtonOrImageElement(control_element)) |
| 209 continue; |
| 210 |
| 211 FindTextFeaturesInElement(control_element, &found_signin_text_features, |
| 212 &found_signup_text_features); |
| 213 |
| 214 // Since <select> is not WebInputElement, but WebSelectElement, process |
| 215 // them as a special case. |
| 216 if (IsSelectElement(control_element)) { |
| 217 number_of_other_input_fields++; |
| 218 } else { |
| 219 const WebInputElement* input_element = |
| 220 toWebInputElement(&control_element); |
| 221 if (!input_element) |
| 222 continue; |
| 223 |
| 224 if (input_element->isTextField()) { |
| 225 if (input_element->isPasswordField()) { |
| 226 ++number_of_password_input_fields; |
| 227 passwords.push_back(*input_element); |
| 228 } else { |
| 229 ++number_of_text_input_fields; |
| 230 found_captcha = found_captcha || IsCaptchaInput(*input_element); |
| 231 } |
| 232 } else { // Non-text fields. |
| 233 if (input_element->isCheckbox()) |
| 234 ++number_of_checkbox_input_fields; |
| 235 else if (!IsSubmitElement(*input_element)) |
| 236 ++number_of_other_input_fields; |
| 237 } |
| 238 } |
| 239 } |
| 240 |
| 241 if (number_of_password_input_fields == 0 || |
| 242 number_of_password_input_fields > 3) |
| 243 return false; |
| 244 |
| 245 if ((number_of_text_input_fields - found_captcha >= |
| 246 MINIMAL_NUMBER_OF_TEXT_FIELDS || |
| 247 number_of_password_input_fields >= MINIMAL_NUMBER_OF_PASSWORD_FIELDS || |
| 248 number_of_checkbox_input_fields >= MINIMAL_NUMBER_OF_CHECKBOX_FIELDS || |
| 249 number_of_other_input_fields >= MINIMAL_NUMBER_OF_OTHER_FIELDS) || |
| 250 (found_signup_text_features && !found_signin_text_features)) { |
| 251 WebInputElement password_creation_field; |
| 252 |
| 253 // TODO(crbug.com/618309): Improve local classifier to distinguish password |
| 254 // creation and password usage fields on the change password forms. |
| 255 if (passwords.size() == 3) |
| 256 password_creation_field = passwords[1]; |
| 257 else |
| 258 password_creation_field = passwords[0]; |
| 259 |
| 260 *generation_field = password_creation_field.nameForAutofill(); |
| 261 return true; |
| 262 } |
| 263 return false; |
| 264 } |
| 265 } |
OLD | NEW |