Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(311)

Unified Diff: components/autofill/content/renderer/form_classifier.cc

Issue 1883183002: [Password Manager] HTML parsing based client-side form type classifier (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Changes addressed to reviewers' comments 2 Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « components/autofill/content/renderer/form_classifier.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: components/autofill/content/renderer/form_classifier.cc
diff --git a/components/autofill/content/renderer/form_classifier.cc b/components/autofill/content/renderer/form_classifier.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7eadb5920b8e34f1c107e3cc3d648013f8122c88
--- /dev/null
+++ b/components/autofill/content/renderer/form_classifier.cc
@@ -0,0 +1,308 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/autofill/content/renderer/form_classifier.h"
+
+#include <algorithm>
+
+#include "base/macros.h"
+#include "base/strings/string16.h"
+#include "base/strings/string_util.h"
+#include "components/autofill/content/renderer/form_autofill_util.h"
+#include "third_party/WebKit/public/platform/WebString.h"
+#include "third_party/WebKit/public/platform/WebVector.h"
+#include "third_party/WebKit/public/web/WebFormControlElement.h"
+#include "third_party/WebKit/public/web/WebInputElement.h"
+#include "third_party/re2/src/re2/re2.h"
+
+using autofill::form_util::WebFormControlElementToFormField;
+using blink::WebFormControlElement;
+using blink::WebInputElement;
+using blink::WebString;
+using blink::WebVector;
+
+namespace autofill {
+
+namespace {
+
+// The words that frequently appear in attribute values of signin forms.
+const char* const kSigninTextFeatures[] = {"signin", "login", "logon", "auth"};
+constexpr size_t kNumberOfSigninFeatures = arraysize(kSigninTextFeatures);
+
+// The words that frequently appear in attribute values of signup forms.
+const char* const kSignupTextFeatures[] = {"signup", "regist", "creat"};
+constexpr size_t kNumberOfSignupFeatures = arraysize(kSignupTextFeatures);
+
+// The words that frequently appear in attribute values of captcha elements.
+const char* const kCaptchaFeatures[] = {"captcha", "security", "code"};
+constexpr size_t kNumberOfCaptchaFeatures = arraysize(kCaptchaFeatures);
+
+// Minimal number of input fields to classify form as signup or change password
+// form. If at least one of the listed thresholds is reached or exceeded, the
+// form is classified as a form where password generation should be available.
+constexpr size_t MINIMAL_NUMBER_OF_TEXT_FIELDS = 2;
+constexpr size_t MINIMAL_NUMBER_OF_PASSWORD_FIELDS = 2;
+constexpr size_t MINIMAL_NUMBER_OF_CHECKBOX_FIELDS = 3;
+constexpr size_t MINIMAL_NUMBER_OF_OTHER_FIELDS = 2;
+
+// Removes some characters from attribute value.
+void ClearAttributeValue(std::string* value) {
+ value->erase(std::remove_if(value->begin(), value->end(),
+ [](char x) { return x == '-' || x == '_'; }),
+ value->end());
+}
+
+// Find |features| in |element|'s attribute values. Returns true if at least one
+// text feature was found.
+bool FindTextFeaturesForClass(const blink::WebElement& element,
+ const char* const features[],
+ size_t number_of_features) {
+ DCHECK(features);
+
+ for (unsigned i = 0; i < element.attributeCount(); ++i) {
+ std::string filtered_value =
+ base::ToLowerASCII(element.attributeValue(i).utf8());
+ ClearAttributeValue(&filtered_value);
+
+ if (filtered_value.empty())
+ continue;
+ for (size_t j = 0; j < number_of_features; ++j) {
+ if (filtered_value.find(features[j]) != std::string::npos)
+ return true;
+ }
+ }
+ return false;
+}
+
+// Returns true if at least one captcha feature was found in |element|'s
+// attribute values.
+bool IsCaptchaInput(const blink::WebInputElement& element) {
+ return FindTextFeaturesForClass(element, kCaptchaFeatures,
+ kNumberOfCaptchaFeatures);
+}
+
+// Finds <img>'s inside |form| and checks if <img>'s attributes contains captcha
+// text features. Returns true, if at least one occurrence was found.
+bool FindCaptchaInImgElements(const blink::WebElement& form,
+ bool ingnore_invisible) {
+ CR_DEFINE_STATIC_LOCAL(WebString, kImageTag, ("img"));
+
+ blink::WebElementCollection img_elements =
+ form.getElementsByHTMLTagName(kImageTag);
+ for (blink::WebElement element = img_elements.firstItem(); !element.isNull();
+ element = img_elements.nextItem()) {
+ if (ingnore_invisible && !form_util::IsWebNodeVisible(element))
+ continue;
+ if (FindTextFeaturesForClass(element, kCaptchaFeatures,
+ kNumberOfCaptchaFeatures)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// Finds signin and signup features in |element|'s attribute values. Sets to
+// true |found_signin_text_features| or |found_signup_text_features| if
+// appropriate features were found.
+void FindTextFeaturesInElement(const blink::WebElement& element,
+ bool* found_signin_text_features,
+ bool* found_signup_text_features) {
+ DCHECK(found_signin_text_features);
+ DCHECK(found_signup_text_features);
+
+ if (!*found_signin_text_features) {
+ *found_signin_text_features = FindTextFeaturesForClass(
+ element, kSigninTextFeatures, kNumberOfSigninFeatures);
+ }
+ if (!*found_signup_text_features) {
+ *found_signup_text_features = FindTextFeaturesForClass(
+ element, kSignupTextFeatures, kNumberOfSignupFeatures);
+ }
+}
+
+// Returns true if |element| has type "button" or "image".
+bool IsButtonOrImageElement(const WebFormControlElement& element) {
+ CR_DEFINE_STATIC_LOCAL(WebString, kButton, ("button"));
+ CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("image"));
+
+ return element.formControlType() == kButton ||
+ element.formControlType() == kImage;
+}
+
+// Returns true if |element| has type "submit".
+bool IsSubmitElement(const WebFormControlElement& element) {
+ CR_DEFINE_STATIC_LOCAL(WebString, kSubmit, ("submit"));
+
+ return element.formControlType() == kSubmit;
+}
+
+// Returns true if |element| has type "hidden";
+bool IsHiddenElement(const WebFormControlElement& element) {
+ CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden"));
+
+ return element.formControlType() == kHidden;
+}
+
+// Returns true if |element| has type "select-multiple" or "select-one".
+bool IsSelectElement(const WebFormControlElement& element) {
+ CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one"));
+ CR_DEFINE_STATIC_LOCAL(WebString, kSelectMultiple, ("select-multiple"));
+
+ return element.formControlType() == kSelectOne ||
+ element.formControlType() == kSelectMultiple;
+}
+
+// Return true if |form| contains at least one visible password element.
+bool FormContainsVisiblePasswordFields(const blink::WebFormElement& form) {
+ WebVector<WebFormControlElement> control_elements;
+ form.getFormControlElements(control_elements);
+ for (auto& control_element : control_elements) {
+ const WebInputElement* input_element = toWebInputElement(&control_element);
+ if (!input_element)
+ continue;
+ if (input_element->isPasswordField() &&
+ form_util::IsWebNodeVisible(*input_element)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// Finds text features in |form| (only <form>'s attribute values, but not its
+// descendants) and its ancestors (only attribute values of the parental nodes,
+// but not the whole tree of the ancestors).
+// Sets |found_signin_text_features| and |found_signup_text_features| to true,
+// if corresponding features are found.
+void FindTextFeaturesInFormAndItsAncestors(const blink::WebFormElement& form,
+ bool* found_signin_text_features,
+ bool* found_signup_text_features) {
+ CR_DEFINE_STATIC_LOCAL(WebString, kInput, ("input"));
+
+ DCHECK(found_signin_text_features);
+ DCHECK(found_signup_text_features);
+
+ unsigned number_of_inputs = form.getElementsByHTMLTagName(kInput).length();
+ blink::WebNode parent = form;
+ for (; !parent.isNull();) {
+ if (parent.isElementNode()) {
+ blink::WebElement element = parent.toConst<blink::WebElement>();
+ bool has_signin_feature = FindTextFeaturesForClass(
+ element, kSigninTextFeatures, kNumberOfSigninFeatures);
+ bool has_signup_feature = FindTextFeaturesForClass(
+ element, kSignupTextFeatures, kNumberOfSignupFeatures);
+ if (has_signin_feature && has_signup_feature)
+ break;
+ // If this ancestor contains more input elements, the ancestor might
+ // include another form. So, ignore the feature.
+ if (parent.getElementsByHTMLTagName(kInput).length() > number_of_inputs)
+ break;
+ if (has_signin_feature) {
+ *found_signin_text_features = true;
+ break;
+ }
+ if (has_signup_feature) {
+ *found_signup_text_features = true;
+ break;
+ }
+ }
+ parent = parent.parentNode();
+ }
+}
+
+} // namespace
+
+bool ClassifyFormAndFindGenerationField(const blink::WebFormElement& form,
+ base::string16* generation_field) {
+ DCHECK(generation_field);
+
+ if (form.isNull())
+ return false;
+
+ bool ignore_invisible_elements = FormContainsVisiblePasswordFields(form);
+
+ bool found_signin_text_features = false;
+ bool found_signup_text_features = false;
+ size_t number_of_text_input_fields = 0;
+ size_t number_of_password_input_fields = 0;
+ size_t number_of_checkbox_input_fields = 0;
+ size_t number_of_other_input_fields = 0;
+ bool found_captcha =
+ FindCaptchaInImgElements(form, ignore_invisible_elements);
+
+ FindTextFeaturesInFormAndItsAncestors(form, &found_signin_text_features,
+ &found_signup_text_features);
+
+ std::vector<WebInputElement> passwords;
+ WebVector<WebFormControlElement> control_elements;
+ form.getFormControlElements(control_elements);
+
+ for (const WebFormControlElement& control_element : control_elements) {
+ if (IsHiddenElement(control_element))
+ continue;
+ if (ignore_invisible_elements) {
+ bool element_is_invisible = !form_util::IsWebNodeVisible(control_element);
vabr (Chromium) 2016/06/13 16:01:57 nit: Shorter form is: if (!form_util::IsWebNodeVis
kolos1 2016/06/15 14:15:15 Done.
+ if (element_is_invisible)
+ continue;
+ }
+
+ // If type="button" or "image", skip them, because it might be a link
+ // to another form.
+ if (IsButtonOrImageElement(control_element))
+ continue;
+
+ FindTextFeaturesInElement(control_element, &found_signin_text_features,
+ &found_signup_text_features);
+
+ // Since <select> is not WebInputElement, but WebSelectElement, process
+ // them as a special case.
+ if (IsSelectElement(control_element)) {
+ number_of_other_input_fields++;
+ } else {
+ const WebInputElement* input_element =
+ toWebInputElement(&control_element);
+ if (!input_element)
+ continue;
+
+ if (input_element->isTextField()) {
+ if (input_element->isPasswordField()) {
+ ++number_of_password_input_fields;
+ passwords.push_back(*input_element);
+ } else {
+ ++number_of_text_input_fields;
+ found_captcha = found_captcha || IsCaptchaInput(*input_element);
+ }
+ } else { // Non-text fields.
+ if (input_element->isCheckbox())
+ ++number_of_checkbox_input_fields;
+ else if (!IsSubmitElement(*input_element))
+ ++number_of_other_input_fields;
+ }
+ }
+ }
+
+ if (number_of_password_input_fields == 0 ||
+ number_of_password_input_fields > 3)
+ return false;
+
+ if ((number_of_text_input_fields - found_captcha >=
+ MINIMAL_NUMBER_OF_TEXT_FIELDS ||
+ number_of_password_input_fields >= MINIMAL_NUMBER_OF_PASSWORD_FIELDS ||
+ number_of_checkbox_input_fields >= MINIMAL_NUMBER_OF_CHECKBOX_FIELDS ||
+ number_of_other_input_fields >= MINIMAL_NUMBER_OF_OTHER_FIELDS) ||
+ (found_signup_text_features && !found_signin_text_features)) {
+ WebInputElement password_creation_field;
+
+ // TODO(crbug.com/618309): Improve local classifier to distinguish password
+ // creation and password usage fields on the change password forms.
+ if (passwords.size() == 3)
+ password_creation_field = passwords[1];
+ else
+ password_creation_field = passwords[0];
+
+ *generation_field = password_creation_field.nameForAutofill();
+ return true;
+ }
+ return false;
+}
+}
« no previous file with comments | « components/autofill/content/renderer/form_classifier.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698