Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(139)

Side by Side Diff: components/autofill/core/browser/form_structure.cc

Issue 1622073002: [Autofill] Remove longest common prefix from field names when running heuristics. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: tests Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/autofill/core/browser/form_structure.h" 5 #include "components/autofill/core/browser/form_structure.h"
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <map> 9 #include <map>
10 #include <utility> 10 #include <utility>
(...skipping 30 matching lines...) Expand all
41 const char kBillingMode[] = "billing"; 41 const char kBillingMode[] = "billing";
42 const char kShippingMode[] = "shipping"; 42 const char kShippingMode[] = "shipping";
43 43
44 // Strip away >= 5 consecutive digits. 44 // Strip away >= 5 consecutive digits.
45 const char kIgnorePatternInFieldName[] = "\\d{5,}"; 45 const char kIgnorePatternInFieldName[] = "\\d{5,}";
46 46
47 // A form is considered to have a high prediction mismatch rate if the number of 47 // A form is considered to have a high prediction mismatch rate if the number of
48 // mismatches exceeds this threshold. 48 // mismatches exceeds this threshold.
49 const int kNumberOfMismatchesThreshold = 3; 49 const int kNumberOfMismatchesThreshold = 3;
50 50
51 // Only removing common name prefixes if we have a minimum number of fields and
52 // a minimum prefix length. These values are chosen to avoid cases such as two
53 // fields with "address1" and "address2" and be effective against web frameworks
54 // which prepend prefixes such as "ctl01$ctl00$MainContentRegion$" on all
55 // fields.
56 const int kCommonNamePrefixRemovalFieldThreshold = 3;
57 const int kMinCommonNamePrefixLength = 10;
58
51 // Maximum number of characters in the field label to be encoded in a proto. 59 // Maximum number of characters in the field label to be encoded in a proto.
52 const int kMaxFieldLabelNumChars = 200; 60 const int kMaxFieldLabelNumChars = 200;
53 61
54 // Returns whether sending autofill field metadata to the server is enabled. 62 // Returns whether sending autofill field metadata to the server is enabled.
55 bool IsAutofillFieldMetadataEnabled() { 63 bool IsAutofillFieldMetadataEnabled() {
56 const std::string group_name = 64 const std::string group_name =
57 base::FieldTrialList::FindFullName("AutofillFieldMetadata"); 65 base::FieldTrialList::FindFullName("AutofillFieldMetadata");
58 return base::StartsWith(group_name, "Enabled", base::CompareCase::SENSITIVE); 66 return base::StartsWith(group_name, "Enabled", base::CompareCase::SENSITIVE);
59 } 67 }
60 68
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after
312 has_password_field_ = true; 320 has_password_field_ = true;
313 321
314 // Generate a unique name for this field by appending a counter to the name. 322 // Generate a unique name for this field by appending a counter to the name.
315 // Make sure to prepend the counter with a non-numeric digit so that we are 323 // Make sure to prepend the counter with a non-numeric digit so that we are
316 // guaranteed to avoid collisions. 324 // guaranteed to avoid collisions.
317 base::string16 unique_name = 325 base::string16 unique_name =
318 field.name + base::ASCIIToUTF16("_") + 326 field.name + base::ASCIIToUTF16("_") +
319 base::SizeTToString16(++unique_names[field.name]); 327 base::SizeTToString16(++unique_names[field.name]);
320 fields_.push_back(new AutofillField(field, unique_name)); 328 fields_.push_back(new AutofillField(field, unique_name));
321 } 329 }
330
331 // Do further processing on the fields, as needed.
332 ProcessExtractedFields();
322 } 333 }
323 334
324 FormStructure::~FormStructure() {} 335 FormStructure::~FormStructure() {}
325 336
326 void FormStructure::DetermineHeuristicTypes() { 337 void FormStructure::DetermineHeuristicTypes() {
327 // First, try to detect field types based on each field's |autocomplete| 338 // First, try to detect field types based on each field's |autocomplete|
328 // attribute value. If there is at least one form field that specifies an 339 // attribute value. If there is at least one form field that specifies an
329 // autocomplete type hint, don't try to apply other heuristics to match fields 340 // autocomplete type hint, don't try to apply other heuristics to match fields
330 // in this form. 341 // in this form.
331 if (!was_parsed_for_autocomplete_attributes_) 342 if (!was_parsed_for_autocomplete_attributes_)
(...skipping 902 matching lines...) Expand 10 before | Expand all | Expand 10 after
1234 field->set_section(field->section() + "-cc"); 1245 field->set_section(field->section() + "-cc");
1235 else 1246 else
1236 field->set_section(field->section() + "-default"); 1247 field->set_section(field->section() + "-default");
1237 } 1248 }
1238 } 1249 }
1239 1250
1240 bool FormStructure::ShouldSkipField(const FormFieldData& field) const { 1251 bool FormStructure::ShouldSkipField(const FormFieldData& field) const {
1241 return field.is_checkable; 1252 return field.is_checkable;
1242 } 1253 }
1243 1254
1255 void FormStructure::ProcessExtractedFields() {
1256 // Update the field name parsed by heuristics if several criteria are met.
1257 // Several fields must be present in the form.
1258 if (field_count() < kCommonNamePrefixRemovalFieldThreshold)
1259 return;
1260
1261 // Find the longest common prefix within all the field names.
1262 std::vector<base::string16> names;
vabr (Chromium) 2016/01/25 10:10:25 optional: names.reserve(field_count());
Mathieu 2016/01/25 15:52:53 Done.
1263 for (const AutofillField* field : *this)
1264 names.push_back(field->name);
1265
1266 const base::string16 longest_prefix = FindLongestCommonPrefix(names);
1267 if (longest_prefix.size() < kMinCommonNamePrefixLength)
1268 return;
1269
1270 // The name without the prefix will be used for heuristics parsing.
1271 for (AutofillField* field : *this) {
1272 field->set_parseable_name(
1273 field->name.substr(longest_prefix.size(), field->name.size()));
1274 }
1275 }
1276
1277 // static
1278 base::string16 FormStructure::FindLongestCommonPrefix(
1279 const std::vector<base::string16>& strings) {
1280 // Go through each character of the first string until there is a mismatch at
1281 // the same position in any other string. Adapted from http://goo.gl/YGukMM.
1282 for (size_t prefix_len = 0; prefix_len < strings[0].size(); prefix_len++) {
1283 for (size_t i = 1; i < strings.size(); i++) {
1284 if (prefix_len >= strings[i].size() ||
1285 strings[i].at(prefix_len) != strings[0].at(prefix_len)) {
1286 // Mismatch found.
1287 return strings[i].substr(0, prefix_len);
1288 }
1289 }
1290 }
1291 return strings[0];
1292 }
1293
1244 } // namespace autofill 1294 } // namespace autofill
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698