Index: components/autofill/content/renderer/form_classifier.cc |
diff --git a/components/autofill/content/renderer/form_classifier.cc b/components/autofill/content/renderer/form_classifier.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..e3d23f8bac90766c378638d282dcca0a7b91fa7e |
--- /dev/null |
+++ b/components/autofill/content/renderer/form_classifier.cc |
@@ -0,0 +1,287 @@ |
+// Copyright 2016 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "components/autofill/content/renderer/form_classifier.h" |
+ |
+#include <algorithm> |
+ |
+#include "base/strings/string_util.h" |
vabr (Chromium)
2016/06/10 13:22:12
Please also #include "base/string16.h".
kolos1
2016/06/13 14:27:34
Done.
|
+#include "components/autofill/content/renderer/form_autofill_util.h" |
+#include "third_party/WebKit/public/platform/WebString.h" |
+#include "third_party/WebKit/public/platform/WebVector.h" |
+#include "third_party/WebKit/public/web/WebFormControlElement.h" |
+#include "third_party/WebKit/public/web/WebInputElement.h" |
+ |
+using autofill::form_util::WebFormControlElementToFormField; |
+using blink::WebFormControlElement; |
+using blink::WebInputElement; |
+using blink::WebString; |
+using blink::WebVector; |
+ |
+namespace autofill { |
+ |
+namespace { |
+ |
+// The words that frequently appear in attribute values of signin forms. |
+const char* const kSigninTextFeatures[] = {"signin", "login", "logon", "auth"}; |
+const int kNumberOfSigninFeatures = arraysize(kSigninTextFeatures); |
vabr (Chromium)
2016/06/10 13:22:11
Please #include "base/macros.h" for arraysize.
vabr (Chromium)
2016/06/10 13:22:12
Please use constexpr here and with the constants b
vabr (Chromium)
2016/06/10 13:22:12
Please use size_t instead of int. Unlike int, size
kolos1
2016/06/13 14:27:34
Done.
kolos1
2016/06/13 14:27:34
Done.
kolos1
2016/06/13 14:27:34
Done.
|
+ |
+// The words that frequently appear in attribute values of signup forms. |
+const char* const kSignupTextFeatures[] = {"signup", "regist", "creat"}; |
+const int kNumberOfSignupFeatures = arraysize(kSignupTextFeatures); |
+ |
+// The words that frequently appear in attribute values of captcha elements. |
+const char* const kCaptchaFeatures[] = {"captcha", "security", "code"}; |
+const int kNumberOfCaptchaFeatures = arraysize(kCaptchaFeatures); |
+ |
+// The characters that should be removed from attribute values. |
+const char kCharactersToBeRemoved[] = {'-', '_'}; |
+ |
+// Minimal number of input fields to detect signup/change password form. |
vabr (Chromium)
2016/06/10 13:22:11
The comment does not make it clear how the minimal
kolos1
2016/06/13 14:27:34
Fixed the comment.
|
+const size_t MINIMAL_NUMBER_OF_TEXT_FIELDS = 2; |
+const size_t MINIMAL_NUMBER_OF_PASSWORD_FIELDS = 2; |
+const size_t MINIMAL_NUMBER_OF_CHECKBOX_FIELDS = 3; |
+const size_t MINIMAL_NUMBER_OF_OTHER_FIELDS = 2; |
+ |
+// Helper function that removes all occurrences of the given character |c| from |
+// the string |str|. |
+void RemoveAllOccurrencesOfCharacter(std::string* str, char c) { |
+ str->erase(std::remove(str->begin(), str->end(), c), str->end()); |
+} |
+ |
+// Find |features| in |element|'s attribute values. Returns true if at least one |
+// text feature was found. |
+bool FindTextFeaturesForClass(const blink::WebElement& element, |
+ const char* const features[], |
+ size_t number_of_features) { |
+ for (unsigned i = 0; i < element.attributeCount(); ++i) { |
+ std::string filtered_value = |
+ base::ToLowerASCII(element.attributeValue(i).utf8()); |
+ for (char d : kCharactersToBeRemoved) |
dvadym
2016/06/09 12:40:59
Can we use regexp for removing symbols?
kolos1
2016/06/10 12:18:08
Done.
|
+ RemoveAllOccurrencesOfCharacter(&filtered_value, d); |
+ if (filtered_value.empty()) |
+ continue; |
+ for (size_t j = 0; j < number_of_features; j++) { |
vabr (Chromium)
2016/06/10 13:22:12
nit: ++j
(Let's be consistent and use prefix incre
kolos1
2016/06/13 14:27:34
Done.
|
+ if (filtered_value.find(features[j]) != std::string::npos) |
+ return true; |
+ } |
+ } |
+ return false; |
+} |
+ |
+// Returns true if at least one captcha feature was found in |element|'s |
+// attribute values. |
+bool IsCaptchaInput(const blink::WebInputElement& element) { |
+ return FindTextFeaturesForClass(element, kCaptchaFeatures, |
+ kNumberOfCaptchaFeatures); |
+} |
+ |
+// Finds <img>'s inside |form| and checks if <img>'s attributes contains captcha |
+// text features. Returns true, if at least one occurrence was found. |
+bool FindCaptchaInImgElements(const blink::WebElement& form, |
+ bool ingnore_invisible) { |
+ CR_DEFINE_STATIC_LOCAL(WebString, kImageTag, ("img")); |
+ |
+ blink::WebElementCollection img_elements = |
+ form.getElementsByHTMLTagName(kImageTag); |
+ for (blink::WebElement element = img_elements.firstItem(); !element.isNull(); |
+ element = img_elements.nextItem()) { |
+ if (ingnore_invisible && !form_util::IsWebNodeVisible(element)) |
+ continue; |
+ if (FindTextFeaturesForClass(element, kCaptchaFeatures, |
+ kNumberOfCaptchaFeatures)) |
+ return true; |
+ } |
+ return false; |
+} |
+ |
+// Finds signin and signup features in |element|'s attribute values. Sets to |
+// true |found_signin_text_features| or |found_signup_text_features| if |
+// appropriate features were found. |
+void FindTextFeaturesInElement(bool* found_signin_text_features, |
dvadym
2016/06/09 12:40:59
Input arguments should be before output arguments
kolos1
2016/06/10 12:18:08
Done.
|
+ bool* found_signup_text_features, |
dvadym
2016/06/09 12:40:59
Could you please add DCHECK for found_*_text_featu
kolos1
2016/06/10 12:18:08
Done.
|
+ const blink::WebElement& element) { |
+ if (!found_signin_text_features) { |
+ *found_signin_text_features = FindTextFeaturesForClass( |
+ element, kSigninTextFeatures, kNumberOfSigninFeatures); |
+ } |
+ if (!found_signup_text_features) { |
+ *found_signup_text_features = FindTextFeaturesForClass( |
+ element, kSignupTextFeatures, kNumberOfSignupFeatures); |
+ } |
+} |
+ |
+// Returns true if |element| has type "button" or "image". |
+bool IsButtonOrImageElement(const WebFormControlElement& element) { |
+ CR_DEFINE_STATIC_LOCAL(WebString, kButton, ("button")); |
dvadym
2016/06/09 12:40:59
All input type names const are in InputTypeNames.h
kolos1
2016/06/10 12:18:08
I'm not sure we could use this file. As I understa
dvadym
2016/06/10 14:12:09
Acknowledged
|
+ CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("image")); |
+ |
+ return element.formControlType() == kButton || |
+ element.formControlType() == kImage; |
+} |
+ |
+// Returns true if |element| has type "submit". |
+bool IsSubmitElement(const WebFormControlElement& element) { |
+ CR_DEFINE_STATIC_LOCAL(WebString, kSubmit, ("submit")); |
+ |
+ return element.formControlType() == kSubmit; |
+} |
+ |
+// Returns true if |element| has type "hidden"; |
+bool IsHiddenElement(const WebFormControlElement& element) { |
+ CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden")); |
+ |
+ return element.formControlType() == kHidden; |
+} |
+ |
+// Returns true if |element| has type "select-multiple" or "select-one". |
+bool IsSelectElement(const WebFormControlElement& element) { |
+ CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one")); |
+ CR_DEFINE_STATIC_LOCAL(WebString, kSelectMultiple, ("select-multiple")); |
+ |
+ return element.formControlType() == kSelectOne || |
+ element.formControlType() == kSelectMultiple; |
+} |
+ |
+// Return true if |form| contains at least one visible password element. |
+bool FormContainsVisiblePasswordFields(const blink::WebFormElement& form) { |
+ WebVector<WebFormControlElement> control_elements; |
+ form.getFormControlElements(control_elements); |
+ for (auto& control_element : control_elements) { |
+ const WebInputElement* input_element = toWebInputElement(&control_element); |
+ if (!input_element) |
+ continue; |
+ if (input_element->isPasswordField() && |
+ form_util::IsWebNodeVisible(*input_element)) |
+ return true; |
+ } |
+ return false; |
+} |
+ |
+// Finds text features in <form> tag of |form| and its ancestors. |
vabr (Chromium)
2016/06/10 13:22:12
nit: The comment sounds not completely correct. Wh
kolos1
2016/06/13 14:27:34
Fixed the comment.
|
+// Sets |found_signin_text_features| and |found_signup_text_features| to true, |
+// if corresponding features are found. |
+void FindTextFeaturesInFormAndItsAncestors(const blink::WebFormElement& form, |
dvadym
2016/06/09 12:40:59
The same comment as to FindTextFeaturesInElement a
kolos1
2016/06/10 12:18:08
Done.
|
+ bool* found_signin_text_features, |
+ bool* found_signup_text_features) { |
+ CR_DEFINE_STATIC_LOCAL(WebString, kInput, ("input")); |
+ |
+ unsigned number_of_inputs = form.getElementsByHTMLTagName(kInput).length(); |
+ blink::WebNode parent = form; |
+ for (; !parent.isNull();) { |
dvadym
2016/06/09 12:40:59
I'm concerning a little bit in performance of this
kolos1
2016/06/10 12:18:08
I will ask to review Webkit specialist. Thanks.
|
+ if (parent.getElementsByHTMLTagName(kInput).length() > number_of_inputs) |
vabr (Chromium)
2016/06/10 13:22:11
+1 to Vadym's worry about performance. This line i
dvadym
2016/06/10 14:12:09
Yeah, perfomance impact is not clear. Independentl
kolos1
2016/06/13 14:27:34
I reduced the number of calls "getElementsByHTMLTa
|
+ break; |
+ if (parent.isElementNode()) { |
+ blink::WebElement element = parent.toConst<blink::WebElement>(); |
+ bool has_signin_feature = FindTextFeaturesForClass( |
+ element, kSigninTextFeatures, kNumberOfSigninFeatures); |
+ bool has_signup_feature = FindTextFeaturesForClass( |
+ element, kSignupTextFeatures, kNumberOfSignupFeatures); |
+ if (has_signin_feature && has_signup_feature) |
+ break; |
+ if (has_signin_feature) { |
+ *found_signin_text_features = true; |
+ break; |
+ } |
+ if (has_signup_feature) { |
+ *found_signup_text_features = true; |
+ break; |
+ } |
+ } |
+ parent = parent.parentNode(); |
+ } |
+} |
+ |
+} // namespace |
+ |
+bool ClassifyFormAndFindGenerationField(const blink::WebFormElement& form, |
+ base::string16* generation_field) { |
+ DCHECK(generation_field); |
+ |
+ if (form.isNull()) |
+ return false; |
+ |
+ bool ignore_invisible_elements = FormContainsVisiblePasswordFields(form); |
+ |
+ bool found_signin_text_features = false; |
+ bool found_signup_text_features = false; |
+ size_t number_of_text_input_fields = 0; |
+ size_t number_of_password_input_fields = 0; |
+ size_t number_of_checkbox_input_fields = 0; |
+ size_t number_of_other_input_fields = 0; |
+ bool found_captcha = |
+ FindCaptchaInImgElements(form, ignore_invisible_elements); |
+ |
+ FindTextFeaturesInFormAndItsAncestors(form, &found_signin_text_features, |
+ &found_signup_text_features); |
+ |
+ std::vector<WebInputElement> passwords; |
+ WebVector<WebFormControlElement> control_elements; |
+ form.getFormControlElements(control_elements); |
+ |
+ for (size_t i = 0; i < control_elements.size(); ++i) { |
vabr (Chromium)
2016/06/10 13:22:11
nit: Compress the first two line into:
for (const
kolos1
2016/06/13 14:27:34
Done.
|
+ WebFormControlElement control_element = control_elements[i]; |
+ bool element_is_invisible = !form_util::IsWebNodeVisible(control_element); |
vabr (Chromium)
2016/06/10 13:22:12
IsWebNodeVisible might be expensive, please only c
kolos1
2016/06/13 14:27:34
Done.
|
+ if ((element_is_invisible && ignore_invisible_elements) || |
+ IsHiddenElement(control_element)) |
vabr (Chromium)
2016/06/10 13:22:11
nit: "if" statements with more than 2 lines in tot
kolos1
2016/06/13 14:27:34
Done.
|
+ continue; |
+ |
+ // If type="button" or "image", skip them, because it might be a link |
+ // to another form. |
+ if (IsButtonOrImageElement(control_element)) |
+ continue; |
+ |
+ FindTextFeaturesInElement(&found_signin_text_features, |
+ &found_signup_text_features, control_element); |
+ |
+ // Since <select> is not WebInputElement, but WebSelectElement, process |
+ // them as a special case. |
+ if (IsSelectElement(control_element)) { |
+ number_of_other_input_fields++; |
vabr (Chromium)
2016/06/10 13:22:12
nit: Here and below: please use prefix ++ unless y
kolos1
2016/06/13 14:27:34
Done.
|
+ } else { |
+ WebInputElement* input_element = toWebInputElement(&control_element); |
+ if (!input_element) |
+ continue; |
+ |
+ if (input_element->isTextField()) { |
+ if (input_element->isPasswordField()) { |
+ number_of_password_input_fields++; |
+ passwords.push_back(*input_element); |
+ } else { |
+ number_of_text_input_fields++; |
+ found_captcha = found_captcha || IsCaptchaInput(*input_element); |
+ } |
+ } else { // Non-text fields. |
+ if (input_element->isCheckbox()) |
+ number_of_checkbox_input_fields++; |
+ else if (!IsSubmitElement(*input_element)) |
+ number_of_other_input_fields++; |
+ } |
+ } |
+ } |
+ |
+ if (number_of_password_input_fields == 0) |
+ return false; |
+ |
+ if ((number_of_text_input_fields - found_captcha >= |
+ MINIMAL_NUMBER_OF_TEXT_FIELDS || |
+ number_of_password_input_fields >= MINIMAL_NUMBER_OF_PASSWORD_FIELDS || |
+ number_of_checkbox_input_fields >= MINIMAL_NUMBER_OF_CHECKBOX_FIELDS || |
+ number_of_other_input_fields >= MINIMAL_NUMBER_OF_OTHER_FIELDS) || |
+ (found_signup_text_features && !found_signin_text_features)) { |
+ WebInputElement password_creation_field; |
+ |
+ // TODO(crbug.com/618309): Improve local classifier to distinguish password |
+ // creation and password usage fields on the change password forms. |
+ if (passwords.size() == 3) |
+ password_creation_field = passwords[1]; |
+ else |
+ password_creation_field = passwords[0]; |
dvadym
2016/06/09 12:40:59
What's about case when passwords.size() > 3? It co
kolos1
2016/06/10 12:18:08
I saw one site where there were 4 password fields
|
+ |
+ *generation_field = password_creation_field.nameForAutofill(); |
+ return true; |
+ } |
+ return false; |
+} |
+} |