Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(389)

Side by Side Diff: components/autofill/core/browser/form_structure.cc

Issue 1622073002: [Autofill] Remove longest common prefix from field names when running heuristics. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: std::move Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/autofill/core/browser/form_structure.h" 5 #include "components/autofill/core/browser/form_structure.h"
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <algorithm>
9 #include <map> 10 #include <map>
10 #include <utility> 11 #include <utility>
11 12
12 #include "base/command_line.h" 13 #include "base/command_line.h"
13 #include "base/i18n/case_conversion.h" 14 #include "base/i18n/case_conversion.h"
14 #include "base/logging.h" 15 #include "base/logging.h"
15 #include "base/memory/scoped_ptr.h" 16 #include "base/memory/scoped_ptr.h"
16 #include "base/metrics/field_trial.h" 17 #include "base/metrics/field_trial.h"
17 #include "base/sha1.h" 18 #include "base/sha1.h"
18 #include "base/strings/string_number_conversions.h" 19 #include "base/strings/string_number_conversions.h"
(...skipping 22 matching lines...) Expand all
41 const char kBillingMode[] = "billing"; 42 const char kBillingMode[] = "billing";
42 const char kShippingMode[] = "shipping"; 43 const char kShippingMode[] = "shipping";
43 44
44 // Strip away >= 5 consecutive digits. 45 // Strip away >= 5 consecutive digits.
45 const char kIgnorePatternInFieldName[] = "\\d{5,}"; 46 const char kIgnorePatternInFieldName[] = "\\d{5,}";
46 47
47 // A form is considered to have a high prediction mismatch rate if the number of 48 // A form is considered to have a high prediction mismatch rate if the number of
48 // mismatches exceeds this threshold. 49 // mismatches exceeds this threshold.
49 const int kNumberOfMismatchesThreshold = 3; 50 const int kNumberOfMismatchesThreshold = 3;
50 51
52 // Only removing common name prefixes if we have a minimum number of fields and
53 // a minimum prefix length. These values are chosen to avoid cases such as two
54 // fields with "address1" and "address2" and be effective against web frameworks
55 // which prepend prefixes such as "ctl01$ctl00$MainContentRegion$" on all
56 // fields.
57 const int kCommonNamePrefixRemovalFieldThreshold = 3;
58 const int kMinCommonNamePrefixLength = 10;
59
51 // Maximum number of characters in the field label to be encoded in a proto. 60 // Maximum number of characters in the field label to be encoded in a proto.
52 const int kMaxFieldLabelNumChars = 200; 61 const int kMaxFieldLabelNumChars = 200;
53 62
54 // Returns whether sending autofill field metadata to the server is enabled. 63 // Returns whether sending autofill field metadata to the server is enabled.
55 bool IsAutofillFieldMetadataEnabled() { 64 bool IsAutofillFieldMetadataEnabled() {
56 const std::string group_name = 65 const std::string group_name =
57 base::FieldTrialList::FindFullName("AutofillFieldMetadata"); 66 base::FieldTrialList::FindFullName("AutofillFieldMetadata");
58 return base::StartsWith(group_name, "Enabled", base::CompareCase::SENSITIVE); 67 return base::StartsWith(group_name, "Enabled", base::CompareCase::SENSITIVE);
59 } 68 }
60 69
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after
312 has_password_field_ = true; 321 has_password_field_ = true;
313 322
314 // Generate a unique name for this field by appending a counter to the name. 323 // Generate a unique name for this field by appending a counter to the name.
315 // Make sure to prepend the counter with a non-numeric digit so that we are 324 // Make sure to prepend the counter with a non-numeric digit so that we are
316 // guaranteed to avoid collisions. 325 // guaranteed to avoid collisions.
317 base::string16 unique_name = 326 base::string16 unique_name =
318 field.name + base::ASCIIToUTF16("_") + 327 field.name + base::ASCIIToUTF16("_") +
319 base::SizeTToString16(++unique_names[field.name]); 328 base::SizeTToString16(++unique_names[field.name]);
320 fields_.push_back(new AutofillField(field, unique_name)); 329 fields_.push_back(new AutofillField(field, unique_name));
321 } 330 }
331
332 // Do further processing on the fields, as needed.
333 ProcessExtractedFields();
322 } 334 }
323 335
324 FormStructure::~FormStructure() {} 336 FormStructure::~FormStructure() {}
325 337
326 void FormStructure::DetermineHeuristicTypes() { 338 void FormStructure::DetermineHeuristicTypes() {
327 // First, try to detect field types based on each field's |autocomplete| 339 // First, try to detect field types based on each field's |autocomplete|
328 // attribute value. If there is at least one form field that specifies an 340 // attribute value. If there is at least one form field that specifies an
329 // autocomplete type hint, don't try to apply other heuristics to match fields 341 // autocomplete type hint, don't try to apply other heuristics to match fields
330 // in this form. 342 // in this form.
331 if (!was_parsed_for_autocomplete_attributes_) 343 if (!was_parsed_for_autocomplete_attributes_)
(...skipping 902 matching lines...) Expand 10 before | Expand all | Expand 10 after
1234 field->set_section(field->section() + "-cc"); 1246 field->set_section(field->section() + "-cc");
1235 else 1247 else
1236 field->set_section(field->section() + "-default"); 1248 field->set_section(field->section() + "-default");
1237 } 1249 }
1238 } 1250 }
1239 1251
1240 bool FormStructure::ShouldSkipField(const FormFieldData& field) const { 1252 bool FormStructure::ShouldSkipField(const FormFieldData& field) const {
1241 return field.is_checkable; 1253 return field.is_checkable;
1242 } 1254 }
1243 1255
1256 void FormStructure::ProcessExtractedFields() {
1257 // Update the field name parsed by heuristics if several criteria are met.
1258 // Several fields must be present in the form.
1259 if (field_count() < kCommonNamePrefixRemovalFieldThreshold)
1260 return;
1261
1262 // Find the longest common prefix within all the field names.
1263 std::vector<base::string16> names;
1264 names.reserve(field_count());
1265 for (const AutofillField* field : *this)
1266 names.push_back(field->name);
1267
1268 base::StringPiece16 longest_prefix = FindLongestCommonPrefix(names);
1269 if (longest_prefix.size() < kMinCommonNamePrefixLength)
1270 return;
1271
1272 // The name without the prefix will be used for heuristics parsing.
1273 for (AutofillField* field : *this) {
1274 field->set_parseable_name(
1275 field->name.substr(longest_prefix.size(), field->name.size()));
1276 }
1277 }
1278
1279 // static
1280 base::StringPiece16 FormStructure::FindLongestCommonPrefix(
1281 const std::vector<base::string16>& strings) {
1282 if (strings.empty())
1283 return base::StringPiece16();
1284
1285 // Go through each character of the first string until there is a mismatch at
1286 // the same position in any other string. Adapted from http://goo.gl/YGukMM.
1287 for (size_t prefix_len = 0; prefix_len < strings[0].size(); prefix_len++) {
1288 for (size_t i = 1; i < strings.size(); i++) {
1289 if (prefix_len >= strings[i].size() ||
1290 strings[i].at(prefix_len) != strings[0].at(prefix_len)) {
1291 // Mismatch found.
1292 return base::StringPiece16(strings[i].data(), prefix_len);
1293 }
1294 }
1295 }
1296 return strings[0];
1297 }
1298
1244 } // namespace autofill 1299 } // namespace autofill
OLDNEW
« no previous file with comments | « components/autofill/core/browser/form_structure.h ('k') | components/autofill/core/browser/form_structure_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698