| Index: components/autofill/core/browser/form_structure.cc
|
| diff --git a/components/autofill/core/browser/form_structure.cc b/components/autofill/core/browser/form_structure.cc
|
| index 73aeb1a6b95ca779ba62df604c7afb57efd9d10f..feb1ddc7dbb1755bb0a175bc3a3ce69c7afa1b31 100644
|
| --- a/components/autofill/core/browser/form_structure.cc
|
| +++ b/components/autofill/core/browser/form_structure.cc
|
| @@ -6,6 +6,7 @@
|
|
|
| #include <stdint.h>
|
|
|
| +#include <algorithm>
|
| #include <map>
|
| #include <utility>
|
|
|
| @@ -48,6 +49,14 @@ const char kIgnorePatternInFieldName[] = "\\d{5,}";
|
| // mismatches exceeds this threshold.
|
| const int kNumberOfMismatchesThreshold = 3;
|
|
|
| +// Only removing common name prefixes if we have a minimum number of fields and
|
| +// a minimum prefix length. These values are chosen to avoid cases such as two
|
| +// fields with "address1" and "address2" and be effective against web frameworks
|
| +// which prepend prefixes such as "ctl01$ctl00$MainContentRegion$" on all
|
| +// fields.
|
| +const int kCommonNamePrefixRemovalFieldThreshold = 3;
|
| +const int kMinCommonNamePrefixLength = 10;
|
| +
|
| // Maximum number of characters in the field label to be encoded in a proto.
|
| const int kMaxFieldLabelNumChars = 200;
|
|
|
| @@ -319,6 +328,9 @@ FormStructure::FormStructure(const FormData& form)
|
| base::SizeTToString16(++unique_names[field.name]);
|
| fields_.push_back(new AutofillField(field, unique_name));
|
| }
|
| +
|
| + // Do further processing on the fields, as needed.
|
| + ProcessExtractedFields();
|
| }
|
|
|
| FormStructure::~FormStructure() {}
|
| @@ -1241,4 +1253,47 @@ bool FormStructure::ShouldSkipField(const FormFieldData& field) const {
|
| return field.is_checkable;
|
| }
|
|
|
| +void FormStructure::ProcessExtractedFields() {
|
| + // Update the field name parsed by heuristics if several criteria are met.
|
| + // Several fields must be present in the form.
|
| + if (field_count() < kCommonNamePrefixRemovalFieldThreshold)
|
| + return;
|
| +
|
| + // Find the longest common prefix within all the field names.
|
| + std::vector<base::string16> names;
|
| + names.reserve(field_count());
|
| + for (const AutofillField* field : *this)
|
| + names.push_back(field->name);
|
| +
|
| + base::StringPiece16 longest_prefix = FindLongestCommonPrefix(names);
|
| + if (longest_prefix.size() < kMinCommonNamePrefixLength)
|
| + return;
|
| +
|
| + // The name without the prefix will be used for heuristics parsing.
|
| + for (AutofillField* field : *this) {
|
| + field->set_parseable_name(
|
| + field->name.substr(longest_prefix.size(), field->name.size()));
|
| + }
|
| +}
|
| +
|
| +// static
|
| +base::StringPiece16 FormStructure::FindLongestCommonPrefix(
|
| + const std::vector<base::string16>& strings) {
|
| + if (strings.empty())
|
| + return base::StringPiece16();
|
| +
|
| + // Go through each character of the first string until there is a mismatch at
|
| + // the same position in any other string. Adapted from http://goo.gl/YGukMM.
|
| + for (size_t prefix_len = 0; prefix_len < strings[0].size(); prefix_len++) {
|
| + for (size_t i = 1; i < strings.size(); i++) {
|
| + if (prefix_len >= strings[i].size() ||
|
| + strings[i].at(prefix_len) != strings[0].at(prefix_len)) {
|
| + // Mismatch found.
|
| + return base::StringPiece16(strings[i].data(), prefix_len);
|
| + }
|
| + }
|
| + }
|
| + return strings[0];
|
| +}
|
| +
|
| } // namespace autofill
|
|
|