Chromium Code Reviews| Index: components/autofill/core/browser/form_structure.cc |
| diff --git a/components/autofill/core/browser/form_structure.cc b/components/autofill/core/browser/form_structure.cc |
| index 73aeb1a6b95ca779ba62df604c7afb57efd9d10f..e7800b31527a1d611392abd88cf1963dbf1d3752 100644 |
| --- a/components/autofill/core/browser/form_structure.cc |
| +++ b/components/autofill/core/browser/form_structure.cc |
| @@ -6,6 +6,7 @@ |
| #include <stdint.h> |
| +#include <algorithm> |
| #include <map> |
| #include <utility> |
| @@ -48,6 +49,14 @@ const char kIgnorePatternInFieldName[] = "\\d{5,}"; |
| // mismatches exceeds this threshold. |
| const int kNumberOfMismatchesThreshold = 3; |
| +// Only removing common name prefixes if we have a minimum number of fields and |
| +// a minimum prefix length. These values are chosen to avoid cases such as two |
| +// fields with "address1" and "address2" and be effective against web frameworks |
| +// which prepend prefixes such as "ctl01$ctl00$MainContentRegion$" on all |
| +// fields. |
| +const int kCommonNamePrefixRemovalFieldThreshold = 3; |
| +const int kMinCommonNamePrefixLength = 10; |
| + |
| // Maximum number of characters in the field label to be encoded in a proto. |
| const int kMaxFieldLabelNumChars = 200; |
| @@ -319,6 +328,9 @@ FormStructure::FormStructure(const FormData& form) |
| base::SizeTToString16(++unique_names[field.name]); |
| fields_.push_back(new AutofillField(field, unique_name)); |
| } |
| + |
| + // Do further processing on the fields, as needed. |
| + ProcessExtractedFields(); |
| } |
| FormStructure::~FormStructure() {} |
| @@ -1241,4 +1253,48 @@ bool FormStructure::ShouldSkipField(const FormFieldData& field) const { |
| return field.is_checkable; |
| } |
| +void FormStructure::ProcessExtractedFields() { |
| + // Update the field name parsed by heuristics if several criteria are met. |
| + // Several fields must be present in the form. |
| + if (field_count() < kCommonNamePrefixRemovalFieldThreshold) |
| + return; |
| + |
| + // Find the longest common prefix within all the field names. |
| + std::vector<base::string16> names; |
| + names.reserve(field_count()); |
| + for (const AutofillField* field : *this) |
| + names.push_back(field->name); |
| + |
| + base::StringPiece16 longest_prefix = FindLongestCommonPrefix(names); |
| + if (longest_prefix.size() < kMinCommonNamePrefixLength) |
| + return; |
| + |
| + // The name without the prefix will be used for heuristics parsing. |
| + for (AutofillField* field : *this) { |
| + base::string16 truncated = |
| + field->name.substr(longest_prefix.size(), field->name.size()); |
| + field->set_parseable_name(std::move(truncated)); |
|
vabr (Chromium)
2016/01/25 16:40:27
optional nit:
If you inline truncated, you will no
Mathieu
2016/01/25 18:06:28
Thanks!
|
| + } |
| +} |
| + |
| +// static |
| +base::StringPiece16 FormStructure::FindLongestCommonPrefix( |
| + const std::vector<base::string16>& strings) { |
| + if (strings.empty()) |
| + return base::StringPiece16(); |
| + |
| + // Go through each character of the first string until there is a mismatch at |
| + // the same position in any other string. Adapted from http://goo.gl/YGukMM. |
| + for (size_t prefix_len = 0; prefix_len < strings[0].size(); prefix_len++) { |
| + for (size_t i = 1; i < strings.size(); i++) { |
| + if (prefix_len >= strings[i].size() || |
| + strings[i].at(prefix_len) != strings[0].at(prefix_len)) { |
| + // Mismatch found. |
| + return base::StringPiece16(strings[i].data(), prefix_len); |
| + } |
| + } |
| + } |
| + return strings[0]; |
| +} |
| + |
| } // namespace autofill |