Chromium Code Reviews| Index: components/autofill/content/renderer/form_classifier.cc |
| diff --git a/components/autofill/content/renderer/form_classifier.cc b/components/autofill/content/renderer/form_classifier.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..f457e0911e5068878adb5704a5ab7b58ec7e0870 |
| --- /dev/null |
| +++ b/components/autofill/content/renderer/form_classifier.cc |
| @@ -0,0 +1,291 @@ |
| +// Copyright 2016 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "components/autofill/content/renderer/form_classifier.h" |
| + |
| +#include <algorithm> |
| + |
| +#include "base/strings/string_util.h" |
| +#include "components/autofill/content/renderer/form_autofill_util.h" |
| +#include "third_party/WebKit/public/platform/WebString.h" |
| +#include "third_party/WebKit/public/platform/WebVector.h" |
| +#include "third_party/WebKit/public/web/WebFormControlElement.h" |
| +#include "third_party/WebKit/public/web/WebInputElement.h" |
| +#include "third_party/re2/src/re2/re2.h" |
| + |
| +using autofill::form_util::WebFormControlElementToFormField; |
| +using blink::WebFormControlElement; |
| +using blink::WebInputElement; |
| +using blink::WebString; |
| +using blink::WebVector; |
| + |
| +namespace autofill { |
| + |
| +namespace { |
| + |
| +// The words that frequently appear in attribute values of signin forms. |
| +const char* const kSigninTextFeatures[] = {"signin", "login", "logon", "auth"}; |
| +const int kNumberOfSigninFeatures = arraysize(kSigninTextFeatures); |
| + |
| +// The words that frequently appear in attribute values of signup forms. |
| +const char* const kSignupTextFeatures[] = {"signup", "regist", "creat"}; |
| +const int kNumberOfSignupFeatures = arraysize(kSignupTextFeatures); |
| + |
| +// The words that frequently appear in attribute values of captcha elements. |
| +const char* const kCaptchaFeatures[] = {"captcha", "security", "code"}; |
| +const int kNumberOfCaptchaFeatures = arraysize(kCaptchaFeatures); |
| + |
| +// The characters that should be removed from attribute values. |
| +const char kCharactersToBeRemoved[] = "-|_"; |
| + |
| +// Minimal number of input fields to detect signup/change password form. |
| +const size_t MINIMAL_NUMBER_OF_TEXT_FIELDS = 2; |
| +const size_t MINIMAL_NUMBER_OF_PASSWORD_FIELDS = 2; |
| +const size_t MINIMAL_NUMBER_OF_CHECKBOX_FIELDS = 3; |
| +const size_t MINIMAL_NUMBER_OF_OTHER_FIELDS = 2; |
| + |
| +// Find |features| in |element|'s attribute values. Returns true if at least one |
| +// text feature was found. |
| +bool FindTextFeaturesForClass(const blink::WebElement& element, |
| + const char* const features[], |
| + size_t number_of_features) { |
| + DCHECK(features); |
| + |
| + for (unsigned i = 0; i < element.attributeCount(); ++i) { |
| + std::string filtered_value = |
| + base::ToLowerASCII(element.attributeValue(i).utf8()); |
| + RE2::GlobalReplace(&filtered_value, kCharactersToBeRemoved, ""); |
|
vabr (Chromium)
2016/06/10 13:22:12
I'm afraid both regexps and the substring replacem
dvadym
2016/06/10 14:12:09
I wouldn't mind returning to erase-remove, but I'd
vabr (Chromium)
2016/06/10 14:32:14
Parsing is indeed fast, but building/compiling the
dvadym
2016/06/10 15:05:16
Sure regexp parsing is slow, that exactly what I m
kolos1
2016/06/13 14:27:34
Replaced with erase/remove_if solution.
|
| + |
| + if (filtered_value.empty()) |
| + continue; |
| + for (size_t j = 0; j < number_of_features; j++) { |
| + if (filtered_value.find(features[j]) != std::string::npos) |
| + return true; |
| + } |
| + } |
| + return false; |
| +} |
| + |
| +// Returns true if at least one captcha feature was found in |element|'s |
| +// attribute values. |
| +bool IsCaptchaInput(const blink::WebInputElement& element) { |
| + return FindTextFeaturesForClass(element, kCaptchaFeatures, |
| + kNumberOfCaptchaFeatures); |
| +} |
| + |
| +// Finds <img>'s inside |form| and checks if <img>'s attributes contains captcha |
| +// text features. Returns true, if at least one occurrence was found. |
| +bool FindCaptchaInImgElements(const blink::WebElement& form, |
| + bool ingnore_invisible) { |
| + CR_DEFINE_STATIC_LOCAL(WebString, kImageTag, ("img")); |
| + |
| + blink::WebElementCollection img_elements = |
| + form.getElementsByHTMLTagName(kImageTag); |
| + for (blink::WebElement element = img_elements.firstItem(); !element.isNull(); |
| + element = img_elements.nextItem()) { |
| + if (ingnore_invisible && !form_util::IsWebNodeVisible(element)) |
| + continue; |
| + if (FindTextFeaturesForClass(element, kCaptchaFeatures, |
| + kNumberOfCaptchaFeatures)) |
| + return true; |
| + } |
| + return false; |
| +} |
| + |
| +// Finds signin and signup features in |element|'s attribute values. Sets to |
| +// true |found_signin_text_features| or |found_signup_text_features| if |
| +// appropriate features were found. |
| +void FindTextFeaturesInElement(const blink::WebElement& element, |
| + bool* found_signin_text_features, |
| + bool* found_signup_text_features) { |
| + DCHECK(found_signin_text_features); |
| + DCHECK(found_signup_text_features); |
| + |
| + if (!found_signin_text_features) { |
| + *found_signin_text_features = FindTextFeaturesForClass( |
| + element, kSigninTextFeatures, kNumberOfSigninFeatures); |
| + } |
| + if (!found_signup_text_features) { |
| + *found_signup_text_features = FindTextFeaturesForClass( |
| + element, kSignupTextFeatures, kNumberOfSignupFeatures); |
| + } |
| +} |
| + |
| +// Returns true if |element| has type "button" or "image". |
| +bool IsButtonOrImageElement(const WebFormControlElement& element) { |
| + CR_DEFINE_STATIC_LOCAL(WebString, kButton, ("button")); |
| + CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("image")); |
| + |
| + return element.formControlType() == kButton || |
| + element.formControlType() == kImage; |
| +} |
| + |
| +// Returns true if |element| has type "submit". |
| +bool IsSubmitElement(const WebFormControlElement& element) { |
| + CR_DEFINE_STATIC_LOCAL(WebString, kSubmit, ("submit")); |
| + |
| + return element.formControlType() == kSubmit; |
| +} |
| + |
| +// Returns true if |element| has type "hidden"; |
| +bool IsHiddenElement(const WebFormControlElement& element) { |
| + CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden")); |
| + |
| + return element.formControlType() == kHidden; |
| +} |
| + |
| +// Returns true if |element| has type "select-multiple" or "select-one". |
| +bool IsSelectElement(const WebFormControlElement& element) { |
| + CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one")); |
| + CR_DEFINE_STATIC_LOCAL(WebString, kSelectMultiple, ("select-multiple")); |
| + |
| + return element.formControlType() == kSelectOne || |
| + element.formControlType() == kSelectMultiple; |
| +} |
| + |
| +// Return true if |form| contains at least one visible password element. |
| +bool FormContainsVisiblePasswordFields(const blink::WebFormElement& form) { |
| + WebVector<WebFormControlElement> control_elements; |
| + form.getFormControlElements(control_elements); |
| + for (auto& control_element : control_elements) { |
| + const WebInputElement* input_element = toWebInputElement(&control_element); |
| + if (!input_element) |
| + continue; |
| + if (input_element->isPasswordField() && |
| + form_util::IsWebNodeVisible(*input_element)) |
| + return true; |
| + } |
| + return false; |
| +} |
| + |
| +// Finds text features in <form> tag of |form| and its ancestors. |
| +// Sets |found_signin_text_features| and |found_signup_text_features| to true, |
| +// if corresponding features are found. |
| +void FindTextFeaturesInFormAndItsAncestors(const blink::WebFormElement& form, |
| + bool* found_signin_text_features, |
| + bool* found_signup_text_features) { |
| + CR_DEFINE_STATIC_LOCAL(WebString, kInput, ("input")); |
| + |
| + DCHECK(found_signin_text_features); |
| + DCHECK(found_signup_text_features); |
| + |
| + unsigned number_of_inputs = form.getElementsByHTMLTagName(kInput).length(); |
| + blink::WebNode parent = form; |
| + for (; !parent.isNull();) { |
| + if (parent.getElementsByHTMLTagName(kInput).length() > number_of_inputs) |
| + break; |
| + if (parent.isElementNode()) { |
| + blink::WebElement element = parent.toConst<blink::WebElement>(); |
| + bool has_signin_feature = FindTextFeaturesForClass( |
| + element, kSigninTextFeatures, kNumberOfSigninFeatures); |
| + bool has_signup_feature = FindTextFeaturesForClass( |
| + element, kSignupTextFeatures, kNumberOfSignupFeatures); |
| + if (has_signin_feature && has_signup_feature) |
| + break; |
| + if (has_signin_feature) { |
| + *found_signin_text_features = true; |
| + break; |
| + } |
| + if (has_signup_feature) { |
| + *found_signup_text_features = true; |
| + break; |
| + } |
| + } |
| + parent = parent.parentNode(); |
| + } |
| +} |
| + |
| +} // namespace |
| + |
| +bool ClassifyFormAndFindGenerationField(const blink::WebFormElement& form, |
| + base::string16* generation_field) { |
| + DCHECK(generation_field); |
| + |
| + if (form.isNull()) |
| + return false; |
| + |
| + bool ignore_invisible_elements = FormContainsVisiblePasswordFields(form); |
| + |
| + bool found_signin_text_features = false; |
| + bool found_signup_text_features = false; |
| + size_t number_of_text_input_fields = 0; |
| + size_t number_of_password_input_fields = 0; |
| + size_t number_of_checkbox_input_fields = 0; |
| + size_t number_of_other_input_fields = 0; |
| + bool found_captcha = |
| + FindCaptchaInImgElements(form, ignore_invisible_elements); |
| + |
| + FindTextFeaturesInFormAndItsAncestors(form, &found_signin_text_features, |
| + &found_signup_text_features); |
| + |
| + std::vector<WebInputElement> passwords; |
| + WebVector<WebFormControlElement> control_elements; |
| + form.getFormControlElements(control_elements); |
| + |
| + for (size_t i = 0; i < control_elements.size(); ++i) { |
| + WebFormControlElement control_element = control_elements[i]; |
| + bool element_is_invisible = !form_util::IsWebNodeVisible(control_element); |
| + if ((element_is_invisible && ignore_invisible_elements) || |
| + IsHiddenElement(control_element)) |
| + continue; |
| + |
| + // If type="button" or "image", skip them, because it might be a link |
| + // to another form. |
| + if (IsButtonOrImageElement(control_element)) |
| + continue; |
| + |
| + FindTextFeaturesInElement(control_element, &found_signin_text_features, |
| + &found_signup_text_features); |
| + |
| + // Since <select> is not WebInputElement, but WebSelectElement, process |
| + // them as a special case. |
| + if (IsSelectElement(control_element)) { |
| + number_of_other_input_fields++; |
| + } else { |
| + WebInputElement* input_element = toWebInputElement(&control_element); |
| + if (!input_element) |
| + continue; |
| + |
| + if (input_element->isTextField()) { |
| + if (input_element->isPasswordField()) { |
| + number_of_password_input_fields++; |
| + passwords.push_back(*input_element); |
| + } else { |
| + number_of_text_input_fields++; |
| + found_captcha = found_captcha || IsCaptchaInput(*input_element); |
| + } |
| + } else { // Non-text fields. |
| + if (input_element->isCheckbox()) |
| + number_of_checkbox_input_fields++; |
| + else if (!IsSubmitElement(*input_element)) |
| + number_of_other_input_fields++; |
| + } |
| + } |
| + } |
| + |
| + if (number_of_password_input_fields == 0 || |
| + number_of_password_input_fields > 3) |
| + return false; |
| + |
| + if ((number_of_text_input_fields - found_captcha >= |
| + MINIMAL_NUMBER_OF_TEXT_FIELDS || |
| + number_of_password_input_fields >= MINIMAL_NUMBER_OF_PASSWORD_FIELDS || |
| + number_of_checkbox_input_fields >= MINIMAL_NUMBER_OF_CHECKBOX_FIELDS || |
| + number_of_other_input_fields >= MINIMAL_NUMBER_OF_OTHER_FIELDS) || |
| + (found_signup_text_features && !found_signin_text_features)) { |
| + WebInputElement password_creation_field; |
| + |
| + // TODO(crbug.com/618309): Improve local classifier to distinguish password |
| + // creation and password usage fields on the change password forms. |
| + if (passwords.size() == 3) |
| + password_creation_field = passwords[1]; |
| + else |
| + password_creation_field = passwords[0]; |
| + |
| + *generation_field = password_creation_field.nameForAutofill(); |
| + return true; |
| + } |
| + return false; |
| +} |
| +} |