Index: components/autofill/core/browser/form_structure.cc |
diff --git a/components/autofill/core/browser/form_structure.cc b/components/autofill/core/browser/form_structure.cc |
index 73aeb1a6b95ca779ba62df604c7afb57efd9d10f..b2a4d6316ae2982bf970f78030010733c75426d7 100644 |
--- a/components/autofill/core/browser/form_structure.cc |
+++ b/components/autofill/core/browser/form_structure.cc |
@@ -48,6 +48,14 @@ const char kIgnorePatternInFieldName[] = "\\d{5,}"; |
// mismatches exceeds this threshold. |
const int kNumberOfMismatchesThreshold = 3; |
+// Only removing common name prefixes if we have a minimum number of fields and |
+// a minimum prefix length. These values are chosen to avoid cases such as two |
+// fields with "address1" and "address2" and be effective against web frameworks |
+// which prepend prefixes such as "ctl01$ctl00$MainContentRegion$" on all |
+// fields. |
+const int kCommonNamePrefixRemovalFieldThreshold = 3; |
+const int kMinCommonNamePrefixLength = 10; |
+ |
// Maximum number of characters in the field label to be encoded in a proto. |
const int kMaxFieldLabelNumChars = 200; |
@@ -319,6 +327,9 @@ FormStructure::FormStructure(const FormData& form) |
base::SizeTToString16(++unique_names[field.name]); |
fields_.push_back(new AutofillField(field, unique_name)); |
} |
+ |
+ // Do further processing on the fields, as needed. |
+ ProcessExtractedFields(); |
} |
FormStructure::~FormStructure() {} |
@@ -1241,4 +1252,43 @@ bool FormStructure::ShouldSkipField(const FormFieldData& field) const { |
return field.is_checkable; |
} |
+void FormStructure::ProcessExtractedFields() { |
+ // Update the field name parsed by heuristics if several criteria are met. |
+ // Several fields must be present in the form. |
+ if (field_count() < kCommonNamePrefixRemovalFieldThreshold) |
+ return; |
+ |
+ // Find the longest common prefix within all the field names. |
+ std::vector<base::string16> names; |
vabr (Chromium)
2016/01/25 10:10:25
optional: names.reserve(field_count());
Mathieu
2016/01/25 15:52:53
Done.
|
+ for (const AutofillField* field : *this) |
+ names.push_back(field->name); |
+ |
+ const base::string16 longest_prefix = FindLongestCommonPrefix(names); |
+ if (longest_prefix.size() < kMinCommonNamePrefixLength) |
+ return; |
+ |
+ // The name without the prefix will be used for heuristics parsing. |
+ for (AutofillField* field : *this) { |
+ field->set_parseable_name( |
+ field->name.substr(longest_prefix.size(), field->name.size())); |
+ } |
+} |
+ |
+// static |
+base::string16 FormStructure::FindLongestCommonPrefix( |
+ const std::vector<base::string16>& strings) { |
+ // Go through each character of the first string until there is a mismatch at |
+ // the same position in any other string. Adapted from http://goo.gl/YGukMM. |
+ for (size_t prefix_len = 0; prefix_len < strings[0].size(); prefix_len++) { |
+ for (size_t i = 1; i < strings.size(); i++) { |
+ if (prefix_len >= strings[i].size() || |
+ strings[i].at(prefix_len) != strings[0].at(prefix_len)) { |
+ // Mismatch found. |
+ return strings[i].substr(0, prefix_len); |
+ } |
+ } |
+ } |
+ return strings[0]; |
+} |
+ |
} // namespace autofill |