Index: components/autofill/core/browser/form_structure.cc |
diff --git a/components/autofill/core/browser/form_structure.cc b/components/autofill/core/browser/form_structure.cc |
index 73aeb1a6b95ca779ba62df604c7afb57efd9d10f..feb1ddc7dbb1755bb0a175bc3a3ce69c7afa1b31 100644 |
--- a/components/autofill/core/browser/form_structure.cc |
+++ b/components/autofill/core/browser/form_structure.cc |
@@ -6,6 +6,7 @@ |
#include <stdint.h> |
+#include <algorithm> |
#include <map> |
#include <utility> |
@@ -48,6 +49,14 @@ const char kIgnorePatternInFieldName[] = "\\d{5,}"; |
// mismatches exceeds this threshold. |
const int kNumberOfMismatchesThreshold = 3; |
+// Only removing common name prefixes if we have a minimum number of fields and |
+// a minimum prefix length. These values are chosen to avoid cases such as two |
+// fields with "address1" and "address2" and be effective against web frameworks |
+// which prepend prefixes such as "ctl01$ctl00$MainContentRegion$" on all |
+// fields. |
+const int kCommonNamePrefixRemovalFieldThreshold = 3; |
+const int kMinCommonNamePrefixLength = 10; |
+ |
// Maximum number of characters in the field label to be encoded in a proto. |
const int kMaxFieldLabelNumChars = 200; |
@@ -319,6 +328,9 @@ FormStructure::FormStructure(const FormData& form) |
base::SizeTToString16(++unique_names[field.name]); |
fields_.push_back(new AutofillField(field, unique_name)); |
} |
+ |
+ // Do further processing on the fields, as needed. |
+ ProcessExtractedFields(); |
} |
FormStructure::~FormStructure() {} |
@@ -1241,4 +1253,47 @@ bool FormStructure::ShouldSkipField(const FormFieldData& field) const { |
return field.is_checkable; |
} |
+void FormStructure::ProcessExtractedFields() { |
+ // Update the field name parsed by heuristics if several criteria are met. |
+ // Several fields must be present in the form. |
+ if (field_count() < kCommonNamePrefixRemovalFieldThreshold) |
+ return; |
+ |
+ // Find the longest common prefix within all the field names. |
+ std::vector<base::string16> names; |
+ names.reserve(field_count()); |
+ for (const AutofillField* field : *this) |
+ names.push_back(field->name); |
+ |
+ base::StringPiece16 longest_prefix = FindLongestCommonPrefix(names); |
+ if (longest_prefix.size() < kMinCommonNamePrefixLength) |
+ return; |
+ |
+ // The name without the prefix will be used for heuristics parsing. |
+ for (AutofillField* field : *this) { |
+ field->set_parseable_name( |
+ field->name.substr(longest_prefix.size(), field->name.size())); |
+ } |
+} |
+ |
+// static |
+base::StringPiece16 FormStructure::FindLongestCommonPrefix( |
+ const std::vector<base::string16>& strings) { |
+ if (strings.empty()) |
+ return base::StringPiece16(); |
+ |
+ // Go through each character of the first string until there is a mismatch at |
+ // the same position in any other string. Adapted from http://goo.gl/YGukMM. |
+ for (size_t prefix_len = 0; prefix_len < strings[0].size(); prefix_len++) { |
+ for (size_t i = 1; i < strings.size(); i++) { |
+ if (prefix_len >= strings[i].size() || |
+ strings[i].at(prefix_len) != strings[0].at(prefix_len)) { |
+ // Mismatch found. |
+ return base::StringPiece16(strings[i].data(), prefix_len); |
+ } |
+ } |
+ } |
+ return strings[0]; |
+} |
+ |
} // namespace autofill |