Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(184)

Unified Diff: components/autofill/core/browser/form_structure.cc

Issue 1622073002: [Autofill] Remove longest common prefix from field names when running heuristics. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: std::move Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/autofill/core/browser/form_structure.cc
diff --git a/components/autofill/core/browser/form_structure.cc b/components/autofill/core/browser/form_structure.cc
index 73aeb1a6b95ca779ba62df604c7afb57efd9d10f..feb1ddc7dbb1755bb0a175bc3a3ce69c7afa1b31 100644
--- a/components/autofill/core/browser/form_structure.cc
+++ b/components/autofill/core/browser/form_structure.cc
@@ -6,6 +6,7 @@
#include <stdint.h>
+#include <algorithm>
#include <map>
#include <utility>
@@ -48,6 +49,14 @@ const char kIgnorePatternInFieldName[] = "\\d{5,}";
// mismatches exceeds this threshold.
const int kNumberOfMismatchesThreshold = 3;
+// Only removing common name prefixes if we have a minimum number of fields and
+// a minimum prefix length. These values are chosen to avoid cases such as two
+// fields with "address1" and "address2" and be effective against web frameworks
+// which prepend prefixes such as "ctl01$ctl00$MainContentRegion$" on all
+// fields.
+const int kCommonNamePrefixRemovalFieldThreshold = 3;
+const int kMinCommonNamePrefixLength = 10;
+
// Maximum number of characters in the field label to be encoded in a proto.
const int kMaxFieldLabelNumChars = 200;
@@ -319,6 +328,9 @@ FormStructure::FormStructure(const FormData& form)
base::SizeTToString16(++unique_names[field.name]);
fields_.push_back(new AutofillField(field, unique_name));
}
+
+ // Do further processing on the fields, as needed.
+ ProcessExtractedFields();
}
FormStructure::~FormStructure() {}
@@ -1241,4 +1253,47 @@ bool FormStructure::ShouldSkipField(const FormFieldData& field) const {
return field.is_checkable;
}
+void FormStructure::ProcessExtractedFields() {
+ // Update the field name parsed by heuristics if several criteria are met.
+ // Several fields must be present in the form.
+ if (field_count() < kCommonNamePrefixRemovalFieldThreshold)
+ return;
+
+ // Find the longest common prefix within all the field names.
+ std::vector<base::string16> names;
+ names.reserve(field_count());
+ for (const AutofillField* field : *this)
+ names.push_back(field->name);
+
+ base::StringPiece16 longest_prefix = FindLongestCommonPrefix(names);
+ if (longest_prefix.size() < kMinCommonNamePrefixLength)
+ return;
+
+ // The name without the prefix will be used for heuristics parsing.
+ for (AutofillField* field : *this) {
+ field->set_parseable_name(
+ field->name.substr(longest_prefix.size(), field->name.size()));
+ }
+}
+
+// static
+base::StringPiece16 FormStructure::FindLongestCommonPrefix(
+ const std::vector<base::string16>& strings) {
+ if (strings.empty())
+ return base::StringPiece16();
+
+ // Go through each character of the first string until there is a mismatch at
+ // the same position in any other string. Adapted from http://goo.gl/YGukMM.
+ for (size_t prefix_len = 0; prefix_len < strings[0].size(); prefix_len++) {
+ for (size_t i = 1; i < strings.size(); i++) {
+ if (prefix_len >= strings[i].size() ||
+ strings[i].at(prefix_len) != strings[0].at(prefix_len)) {
+ // Mismatch found.
+ return base::StringPiece16(strings[i].data(), prefix_len);
+ }
+ }
+ }
+ return strings[0];
+}
+
} // namespace autofill
« no previous file with comments | « components/autofill/core/browser/form_structure.h ('k') | components/autofill/core/browser/form_structure_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698