Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(22)

Unified Diff: components/autofill/core/browser/form_structure.cc

Issue 1622073002: [Autofill] Remove longest common prefix from field names when running heuristics. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: tests Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/autofill/core/browser/form_structure.cc
diff --git a/components/autofill/core/browser/form_structure.cc b/components/autofill/core/browser/form_structure.cc
index 73aeb1a6b95ca779ba62df604c7afb57efd9d10f..b2a4d6316ae2982bf970f78030010733c75426d7 100644
--- a/components/autofill/core/browser/form_structure.cc
+++ b/components/autofill/core/browser/form_structure.cc
@@ -48,6 +48,14 @@ const char kIgnorePatternInFieldName[] = "\\d{5,}";
// mismatches exceeds this threshold.
const int kNumberOfMismatchesThreshold = 3;
+// Only removing common name prefixes if we have a minimum number of fields and
+// a minimum prefix length. These values are chosen to avoid cases such as two
+// fields with "address1" and "address2" and be effective against web frameworks
+// which prepend prefixes such as "ctl01$ctl00$MainContentRegion$" on all
+// fields.
+const int kCommonNamePrefixRemovalFieldThreshold = 3;
+const int kMinCommonNamePrefixLength = 10;
+
// Maximum number of characters in the field label to be encoded in a proto.
const int kMaxFieldLabelNumChars = 200;
@@ -319,6 +327,9 @@ FormStructure::FormStructure(const FormData& form)
base::SizeTToString16(++unique_names[field.name]);
fields_.push_back(new AutofillField(field, unique_name));
}
+
+ // Do further processing on the fields, as needed.
+ ProcessExtractedFields();
}
FormStructure::~FormStructure() {}
@@ -1241,4 +1252,43 @@ bool FormStructure::ShouldSkipField(const FormFieldData& field) const {
return field.is_checkable;
}
+void FormStructure::ProcessExtractedFields() {
+ // Update the field name parsed by heuristics if several criteria are met.
+ // Several fields must be present in the form.
+ if (field_count() < kCommonNamePrefixRemovalFieldThreshold)
+ return;
+
+ // Find the longest common prefix within all the field names.
+ std::vector<base::string16> names;
vabr (Chromium) 2016/01/25 10:10:25 optional: names.reserve(field_count());
Mathieu 2016/01/25 15:52:53 Done.
+ for (const AutofillField* field : *this)
+ names.push_back(field->name);
+
+ const base::string16 longest_prefix = FindLongestCommonPrefix(names);
+ if (longest_prefix.size() < kMinCommonNamePrefixLength)
+ return;
+
+ // The name without the prefix will be used for heuristics parsing.
+ for (AutofillField* field : *this) {
+ field->set_parseable_name(
+ field->name.substr(longest_prefix.size(), field->name.size()));
+ }
+}
+
+// static
+base::string16 FormStructure::FindLongestCommonPrefix(
+ const std::vector<base::string16>& strings) {
+ // Go through each character of the first string until there is a mismatch at
+ // the same position in any other string. Adapted from http://goo.gl/YGukMM.
+ for (size_t prefix_len = 0; prefix_len < strings[0].size(); prefix_len++) {
+ for (size_t i = 1; i < strings.size(); i++) {
+ if (prefix_len >= strings[i].size() ||
+ strings[i].at(prefix_len) != strings[0].at(prefix_len)) {
+ // Mismatch found.
+ return strings[i].substr(0, prefix_len);
+ }
+ }
+ }
+ return strings[0];
+}
+
} // namespace autofill

Powered by Google App Engine
This is Rietveld 408576698