Chromium Code Reviews| Index: components/autofill/core/browser/form_structure.cc |
| diff --git a/components/autofill/core/browser/form_structure.cc b/components/autofill/core/browser/form_structure.cc |
| index 73aeb1a6b95ca779ba62df604c7afb57efd9d10f..b2a4d6316ae2982bf970f78030010733c75426d7 100644 |
| --- a/components/autofill/core/browser/form_structure.cc |
| +++ b/components/autofill/core/browser/form_structure.cc |
| @@ -48,6 +48,14 @@ const char kIgnorePatternInFieldName[] = "\\d{5,}"; |
| // mismatches exceeds this threshold. |
| const int kNumberOfMismatchesThreshold = 3; |
| +// Only removing common name prefixes if we have a minimum number of fields and |
| +// a minimum prefix length. These values are chosen to avoid cases such as two |
| +// fields with "address1" and "address2" and be effective against web frameworks |
| +// which prepend prefixes such as "ctl01$ctl00$MainContentRegion$" on all |
| +// fields. |
| +const int kCommonNamePrefixRemovalFieldThreshold = 3; |
| +const int kMinCommonNamePrefixLength = 10; |
| + |
| // Maximum number of characters in the field label to be encoded in a proto. |
| const int kMaxFieldLabelNumChars = 200; |
| @@ -319,6 +327,9 @@ FormStructure::FormStructure(const FormData& form) |
| base::SizeTToString16(++unique_names[field.name]); |
| fields_.push_back(new AutofillField(field, unique_name)); |
| } |
| + |
| + // Do further processing on the fields, as needed. |
| + ProcessExtractedFields(); |
| } |
| FormStructure::~FormStructure() {} |
| @@ -1241,4 +1252,43 @@ bool FormStructure::ShouldSkipField(const FormFieldData& field) const { |
| return field.is_checkable; |
| } |
| +void FormStructure::ProcessExtractedFields() { |
| + // Update the field name parsed by heuristics if several criteria are met. |
| + // Several fields must be present in the form. |
| + if (field_count() < kCommonNamePrefixRemovalFieldThreshold) |
| + return; |
| + |
| + // Find the longest common prefix within all the field names. |
| + std::vector<base::string16> names; |
|
vabr (Chromium)
2016/01/25 10:10:25
optional: names.reserve(field_count());
Mathieu
2016/01/25 15:52:53
Done.
|
| + for (const AutofillField* field : *this) |
| + names.push_back(field->name); |
| + |
| + const base::string16 longest_prefix = FindLongestCommonPrefix(names); |
| + if (longest_prefix.size() < kMinCommonNamePrefixLength) |
| + return; |
| + |
| + // The name without the prefix will be used for heuristics parsing. |
| + for (AutofillField* field : *this) { |
| + field->set_parseable_name( |
| + field->name.substr(longest_prefix.size(), field->name.size())); |
| + } |
| +} |
| + |
| +// static |
| +base::string16 FormStructure::FindLongestCommonPrefix( |
| + const std::vector<base::string16>& strings) { |
| + // Go through each character of the first string until there is a mismatch at |
| + // the same position in any other string. Adapted from http://goo.gl/YGukMM. |
| + for (size_t prefix_len = 0; prefix_len < strings[0].size(); prefix_len++) { |
| + for (size_t i = 1; i < strings.size(); i++) { |
| + if (prefix_len >= strings[i].size() || |
| + strings[i].at(prefix_len) != strings[0].at(prefix_len)) { |
| + // Mismatch found. |
| + return strings[i].substr(0, prefix_len); |
| + } |
| + } |
| + } |
| + return strings[0]; |
| +} |
| + |
| } // namespace autofill |