Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "components/autofill/content/renderer/form_classifier.h" | |
| 6 | |
| 7 #include <algorithm> | |
| 8 | |
| 9 #include "base/strings/string_util.h" | |
| 10 #include "components/autofill/content/renderer/form_autofill_util.h" | |
| 11 #include "third_party/WebKit/public/platform/WebString.h" | |
| 12 #include "third_party/WebKit/public/platform/WebVector.h" | |
| 13 #include "third_party/WebKit/public/web/WebFormControlElement.h" | |
| 14 #include "third_party/WebKit/public/web/WebInputElement.h" | |
| 15 #include "third_party/re2/src/re2/re2.h" | |
| 16 | |
| 17 using autofill::form_util::WebFormControlElementToFormField; | |
| 18 using blink::WebFormControlElement; | |
| 19 using blink::WebInputElement; | |
| 20 using blink::WebString; | |
| 21 using blink::WebVector; | |
| 22 | |
| 23 namespace autofill { | |
| 24 | |
| 25 namespace { | |
| 26 | |
| 27 // The words that frequently appear in attribute values of signin forms. | |
| 28 const char* const kSigninTextFeatures[] = {"signin", "login", "logon", "auth"}; | |
| 29 const int kNumberOfSigninFeatures = arraysize(kSigninTextFeatures); | |
| 30 | |
| 31 // The words that frequently appear in attribute values of signup forms. | |
| 32 const char* const kSignupTextFeatures[] = {"signup", "regist", "creat"}; | |
| 33 const int kNumberOfSignupFeatures = arraysize(kSignupTextFeatures); | |
| 34 | |
| 35 // The words that frequently appear in attribute values of captcha elements. | |
| 36 const char* const kCaptchaFeatures[] = {"captcha", "security", "code"}; | |
| 37 const int kNumberOfCaptchaFeatures = arraysize(kCaptchaFeatures); | |
| 38 | |
| 39 // The characters that should be removed from attribute values. | |
| 40 const char kCharactersToBeRemoved[] = "-|_"; | |
| 41 | |
| 42 // Minimal number of input fields to detect signup/change password form. | |
| 43 const size_t MINIMAL_NUMBER_OF_TEXT_FIELDS = 2; | |
| 44 const size_t MINIMAL_NUMBER_OF_PASSWORD_FIELDS = 2; | |
| 45 const size_t MINIMAL_NUMBER_OF_CHECKBOX_FIELDS = 3; | |
| 46 const size_t MINIMAL_NUMBER_OF_OTHER_FIELDS = 2; | |
| 47 | |
| 48 // Find |features| in |element|'s attribute values. Returns true if at least one | |
| 49 // text feature was found. | |
| 50 bool FindTextFeaturesForClass(const blink::WebElement& element, | |
| 51 const char* const features[], | |
| 52 size_t number_of_features) { | |
| 53 DCHECK(features); | |
| 54 | |
| 55 for (unsigned i = 0; i < element.attributeCount(); ++i) { | |
| 56 std::string filtered_value = | |
| 57 base::ToLowerASCII(element.attributeValue(i).utf8()); | |
| 58 RE2::GlobalReplace(&filtered_value, kCharactersToBeRemoved, ""); | |
|
vabr (Chromium)
2016/06/10 13:22:12
I'm afraid both regexps and the substring replacem
dvadym
2016/06/10 14:12:09
I wouldn't mind returning to erase-remove, but I'd
vabr (Chromium)
2016/06/10 14:32:14
Parsing is indeed fast, but building/compiling the
dvadym
2016/06/10 15:05:16
Sure regexp parsing is slow, that exactly what I m
kolos1
2016/06/13 14:27:34
Replaced with erase/remove_if solution.
| |
| 59 | |
| 60 if (filtered_value.empty()) | |
| 61 continue; | |
| 62 for (size_t j = 0; j < number_of_features; j++) { | |
| 63 if (filtered_value.find(features[j]) != std::string::npos) | |
| 64 return true; | |
| 65 } | |
| 66 } | |
| 67 return false; | |
| 68 } | |
| 69 | |
| 70 // Returns true if at least one captcha feature was found in |element|'s | |
| 71 // attribute values. | |
| 72 bool IsCaptchaInput(const blink::WebInputElement& element) { | |
| 73 return FindTextFeaturesForClass(element, kCaptchaFeatures, | |
| 74 kNumberOfCaptchaFeatures); | |
| 75 } | |
| 76 | |
| 77 // Finds <img>'s inside |form| and checks if <img>'s attributes contains captcha | |
| 78 // text features. Returns true, if at least one occurrence was found. | |
| 79 bool FindCaptchaInImgElements(const blink::WebElement& form, | |
| 80 bool ingnore_invisible) { | |
| 81 CR_DEFINE_STATIC_LOCAL(WebString, kImageTag, ("img")); | |
| 82 | |
| 83 blink::WebElementCollection img_elements = | |
| 84 form.getElementsByHTMLTagName(kImageTag); | |
| 85 for (blink::WebElement element = img_elements.firstItem(); !element.isNull(); | |
| 86 element = img_elements.nextItem()) { | |
| 87 if (ingnore_invisible && !form_util::IsWebNodeVisible(element)) | |
| 88 continue; | |
| 89 if (FindTextFeaturesForClass(element, kCaptchaFeatures, | |
| 90 kNumberOfCaptchaFeatures)) | |
| 91 return true; | |
| 92 } | |
| 93 return false; | |
| 94 } | |
| 95 | |
| 96 // Finds signin and signup features in |element|'s attribute values. Sets to | |
| 97 // true |found_signin_text_features| or |found_signup_text_features| if | |
| 98 // appropriate features were found. | |
| 99 void FindTextFeaturesInElement(const blink::WebElement& element, | |
| 100 bool* found_signin_text_features, | |
| 101 bool* found_signup_text_features) { | |
| 102 DCHECK(found_signin_text_features); | |
| 103 DCHECK(found_signup_text_features); | |
| 104 | |
| 105 if (!found_signin_text_features) { | |
| 106 *found_signin_text_features = FindTextFeaturesForClass( | |
| 107 element, kSigninTextFeatures, kNumberOfSigninFeatures); | |
| 108 } | |
| 109 if (!found_signup_text_features) { | |
| 110 *found_signup_text_features = FindTextFeaturesForClass( | |
| 111 element, kSignupTextFeatures, kNumberOfSignupFeatures); | |
| 112 } | |
| 113 } | |
| 114 | |
| 115 // Returns true if |element| has type "button" or "image". | |
| 116 bool IsButtonOrImageElement(const WebFormControlElement& element) { | |
| 117 CR_DEFINE_STATIC_LOCAL(WebString, kButton, ("button")); | |
| 118 CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("image")); | |
| 119 | |
| 120 return element.formControlType() == kButton || | |
| 121 element.formControlType() == kImage; | |
| 122 } | |
| 123 | |
| 124 // Returns true if |element| has type "submit". | |
| 125 bool IsSubmitElement(const WebFormControlElement& element) { | |
| 126 CR_DEFINE_STATIC_LOCAL(WebString, kSubmit, ("submit")); | |
| 127 | |
| 128 return element.formControlType() == kSubmit; | |
| 129 } | |
| 130 | |
| 131 // Returns true if |element| has type "hidden"; | |
| 132 bool IsHiddenElement(const WebFormControlElement& element) { | |
| 133 CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden")); | |
| 134 | |
| 135 return element.formControlType() == kHidden; | |
| 136 } | |
| 137 | |
| 138 // Returns true if |element| has type "select-multiple" or "select-one". | |
| 139 bool IsSelectElement(const WebFormControlElement& element) { | |
| 140 CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one")); | |
| 141 CR_DEFINE_STATIC_LOCAL(WebString, kSelectMultiple, ("select-multiple")); | |
| 142 | |
| 143 return element.formControlType() == kSelectOne || | |
| 144 element.formControlType() == kSelectMultiple; | |
| 145 } | |
| 146 | |
| 147 // Return true if |form| contains at least one visible password element. | |
| 148 bool FormContainsVisiblePasswordFields(const blink::WebFormElement& form) { | |
| 149 WebVector<WebFormControlElement> control_elements; | |
| 150 form.getFormControlElements(control_elements); | |
| 151 for (auto& control_element : control_elements) { | |
| 152 const WebInputElement* input_element = toWebInputElement(&control_element); | |
| 153 if (!input_element) | |
| 154 continue; | |
| 155 if (input_element->isPasswordField() && | |
| 156 form_util::IsWebNodeVisible(*input_element)) | |
| 157 return true; | |
| 158 } | |
| 159 return false; | |
| 160 } | |
| 161 | |
| 162 // Finds text features in <form> tag of |form| and its ancestors. | |
| 163 // Sets |found_signin_text_features| and |found_signup_text_features| to true, | |
| 164 // if corresponding features are found. | |
| 165 void FindTextFeaturesInFormAndItsAncestors(const blink::WebFormElement& form, | |
| 166 bool* found_signin_text_features, | |
| 167 bool* found_signup_text_features) { | |
| 168 CR_DEFINE_STATIC_LOCAL(WebString, kInput, ("input")); | |
| 169 | |
| 170 DCHECK(found_signin_text_features); | |
| 171 DCHECK(found_signup_text_features); | |
| 172 | |
| 173 unsigned number_of_inputs = form.getElementsByHTMLTagName(kInput).length(); | |
| 174 blink::WebNode parent = form; | |
| 175 for (; !parent.isNull();) { | |
| 176 if (parent.getElementsByHTMLTagName(kInput).length() > number_of_inputs) | |
| 177 break; | |
| 178 if (parent.isElementNode()) { | |
| 179 blink::WebElement element = parent.toConst<blink::WebElement>(); | |
| 180 bool has_signin_feature = FindTextFeaturesForClass( | |
| 181 element, kSigninTextFeatures, kNumberOfSigninFeatures); | |
| 182 bool has_signup_feature = FindTextFeaturesForClass( | |
| 183 element, kSignupTextFeatures, kNumberOfSignupFeatures); | |
| 184 if (has_signin_feature && has_signup_feature) | |
| 185 break; | |
| 186 if (has_signin_feature) { | |
| 187 *found_signin_text_features = true; | |
| 188 break; | |
| 189 } | |
| 190 if (has_signup_feature) { | |
| 191 *found_signup_text_features = true; | |
| 192 break; | |
| 193 } | |
| 194 } | |
| 195 parent = parent.parentNode(); | |
| 196 } | |
| 197 } | |
| 198 | |
| 199 } // namespace | |
| 200 | |
| 201 bool ClassifyFormAndFindGenerationField(const blink::WebFormElement& form, | |
| 202 base::string16* generation_field) { | |
| 203 DCHECK(generation_field); | |
| 204 | |
| 205 if (form.isNull()) | |
| 206 return false; | |
| 207 | |
| 208 bool ignore_invisible_elements = FormContainsVisiblePasswordFields(form); | |
| 209 | |
| 210 bool found_signin_text_features = false; | |
| 211 bool found_signup_text_features = false; | |
| 212 size_t number_of_text_input_fields = 0; | |
| 213 size_t number_of_password_input_fields = 0; | |
| 214 size_t number_of_checkbox_input_fields = 0; | |
| 215 size_t number_of_other_input_fields = 0; | |
| 216 bool found_captcha = | |
| 217 FindCaptchaInImgElements(form, ignore_invisible_elements); | |
| 218 | |
| 219 FindTextFeaturesInFormAndItsAncestors(form, &found_signin_text_features, | |
| 220 &found_signup_text_features); | |
| 221 | |
| 222 std::vector<WebInputElement> passwords; | |
| 223 WebVector<WebFormControlElement> control_elements; | |
| 224 form.getFormControlElements(control_elements); | |
| 225 | |
| 226 for (size_t i = 0; i < control_elements.size(); ++i) { | |
| 227 WebFormControlElement control_element = control_elements[i]; | |
| 228 bool element_is_invisible = !form_util::IsWebNodeVisible(control_element); | |
| 229 if ((element_is_invisible && ignore_invisible_elements) || | |
| 230 IsHiddenElement(control_element)) | |
| 231 continue; | |
| 232 | |
| 233 // If type="button" or "image", skip them, because it might be a link | |
| 234 // to another form. | |
| 235 if (IsButtonOrImageElement(control_element)) | |
| 236 continue; | |
| 237 | |
| 238 FindTextFeaturesInElement(control_element, &found_signin_text_features, | |
| 239 &found_signup_text_features); | |
| 240 | |
| 241 // Since <select> is not WebInputElement, but WebSelectElement, process | |
| 242 // them as a special case. | |
| 243 if (IsSelectElement(control_element)) { | |
| 244 number_of_other_input_fields++; | |
| 245 } else { | |
| 246 WebInputElement* input_element = toWebInputElement(&control_element); | |
| 247 if (!input_element) | |
| 248 continue; | |
| 249 | |
| 250 if (input_element->isTextField()) { | |
| 251 if (input_element->isPasswordField()) { | |
| 252 number_of_password_input_fields++; | |
| 253 passwords.push_back(*input_element); | |
| 254 } else { | |
| 255 number_of_text_input_fields++; | |
| 256 found_captcha = found_captcha || IsCaptchaInput(*input_element); | |
| 257 } | |
| 258 } else { // Non-text fields. | |
| 259 if (input_element->isCheckbox()) | |
| 260 number_of_checkbox_input_fields++; | |
| 261 else if (!IsSubmitElement(*input_element)) | |
| 262 number_of_other_input_fields++; | |
| 263 } | |
| 264 } | |
| 265 } | |
| 266 | |
| 267 if (number_of_password_input_fields == 0 || | |
| 268 number_of_password_input_fields > 3) | |
| 269 return false; | |
| 270 | |
| 271 if ((number_of_text_input_fields - found_captcha >= | |
| 272 MINIMAL_NUMBER_OF_TEXT_FIELDS || | |
| 273 number_of_password_input_fields >= MINIMAL_NUMBER_OF_PASSWORD_FIELDS || | |
| 274 number_of_checkbox_input_fields >= MINIMAL_NUMBER_OF_CHECKBOX_FIELDS || | |
| 275 number_of_other_input_fields >= MINIMAL_NUMBER_OF_OTHER_FIELDS) || | |
| 276 (found_signup_text_features && !found_signin_text_features)) { | |
| 277 WebInputElement password_creation_field; | |
| 278 | |
| 279 // TODO(crbug.com/618309): Improve local classifier to distinguish password | |
| 280 // creation and password usage fields on the change password forms. | |
| 281 if (passwords.size() == 3) | |
| 282 password_creation_field = passwords[1]; | |
| 283 else | |
| 284 password_creation_field = passwords[0]; | |
| 285 | |
| 286 *generation_field = password_creation_field.nameForAutofill(); | |
| 287 return true; | |
| 288 } | |
| 289 return false; | |
| 290 } | |
| 291 } | |
| OLD | NEW |