OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/autofill/core/browser/form_structure.h" | 5 #include "components/autofill/core/browser/form_structure.h" |
6 | 6 |
7 #include <stdint.h> | 7 #include <stdint.h> |
8 | 8 |
9 #include <algorithm> | |
9 #include <map> | 10 #include <map> |
10 #include <utility> | 11 #include <utility> |
11 | 12 |
12 #include "base/command_line.h" | 13 #include "base/command_line.h" |
13 #include "base/i18n/case_conversion.h" | 14 #include "base/i18n/case_conversion.h" |
14 #include "base/logging.h" | 15 #include "base/logging.h" |
15 #include "base/memory/scoped_ptr.h" | 16 #include "base/memory/scoped_ptr.h" |
16 #include "base/metrics/field_trial.h" | 17 #include "base/metrics/field_trial.h" |
17 #include "base/sha1.h" | 18 #include "base/sha1.h" |
18 #include "base/strings/string_number_conversions.h" | 19 #include "base/strings/string_number_conversions.h" |
(...skipping 22 matching lines...) Expand all Loading... | |
41 const char kBillingMode[] = "billing"; | 42 const char kBillingMode[] = "billing"; |
42 const char kShippingMode[] = "shipping"; | 43 const char kShippingMode[] = "shipping"; |
43 | 44 |
44 // Strip away >= 5 consecutive digits. | 45 // Strip away >= 5 consecutive digits. |
45 const char kIgnorePatternInFieldName[] = "\\d{5,}"; | 46 const char kIgnorePatternInFieldName[] = "\\d{5,}"; |
46 | 47 |
47 // A form is considered to have a high prediction mismatch rate if the number of | 48 // A form is considered to have a high prediction mismatch rate if the number of |
48 // mismatches exceeds this threshold. | 49 // mismatches exceeds this threshold. |
49 const int kNumberOfMismatchesThreshold = 3; | 50 const int kNumberOfMismatchesThreshold = 3; |
50 | 51 |
52 // Only removing common name prefixes if we have a minimum number of fields and | |
53 // a minimum prefix length. These values are chosen to avoid cases such as two | |
54 // fields with "address1" and "address2" and be effective against web frameworks | |
55 // which prepend prefixes such as "ctl01$ctl00$MainContentRegion$" on all | |
56 // fields. | |
57 const int kCommonNamePrefixRemovalFieldThreshold = 3; | |
58 const int kMinCommonNamePrefixLength = 10; | |
59 | |
51 // Maximum number of characters in the field label to be encoded in a proto. | 60 // Maximum number of characters in the field label to be encoded in a proto. |
52 const int kMaxFieldLabelNumChars = 200; | 61 const int kMaxFieldLabelNumChars = 200; |
53 | 62 |
54 // Returns whether sending autofill field metadata to the server is enabled. | 63 // Returns whether sending autofill field metadata to the server is enabled. |
55 bool IsAutofillFieldMetadataEnabled() { | 64 bool IsAutofillFieldMetadataEnabled() { |
56 const std::string group_name = | 65 const std::string group_name = |
57 base::FieldTrialList::FindFullName("AutofillFieldMetadata"); | 66 base::FieldTrialList::FindFullName("AutofillFieldMetadata"); |
58 return base::StartsWith(group_name, "Enabled", base::CompareCase::SENSITIVE); | 67 return base::StartsWith(group_name, "Enabled", base::CompareCase::SENSITIVE); |
59 } | 68 } |
60 | 69 |
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
312 has_password_field_ = true; | 321 has_password_field_ = true; |
313 | 322 |
314 // Generate a unique name for this field by appending a counter to the name. | 323 // Generate a unique name for this field by appending a counter to the name. |
315 // Make sure to prepend the counter with a non-numeric digit so that we are | 324 // Make sure to prepend the counter with a non-numeric digit so that we are |
316 // guaranteed to avoid collisions. | 325 // guaranteed to avoid collisions. |
317 base::string16 unique_name = | 326 base::string16 unique_name = |
318 field.name + base::ASCIIToUTF16("_") + | 327 field.name + base::ASCIIToUTF16("_") + |
319 base::SizeTToString16(++unique_names[field.name]); | 328 base::SizeTToString16(++unique_names[field.name]); |
320 fields_.push_back(new AutofillField(field, unique_name)); | 329 fields_.push_back(new AutofillField(field, unique_name)); |
321 } | 330 } |
331 | |
332 // Do further processing on the fields, as needed. | |
333 ProcessExtractedFields(); | |
322 } | 334 } |
323 | 335 |
324 FormStructure::~FormStructure() {} | 336 FormStructure::~FormStructure() {} |
325 | 337 |
326 void FormStructure::DetermineHeuristicTypes() { | 338 void FormStructure::DetermineHeuristicTypes() { |
327 // First, try to detect field types based on each field's |autocomplete| | 339 // First, try to detect field types based on each field's |autocomplete| |
328 // attribute value. If there is at least one form field that specifies an | 340 // attribute value. If there is at least one form field that specifies an |
329 // autocomplete type hint, don't try to apply other heuristics to match fields | 341 // autocomplete type hint, don't try to apply other heuristics to match fields |
330 // in this form. | 342 // in this form. |
331 if (!was_parsed_for_autocomplete_attributes_) | 343 if (!was_parsed_for_autocomplete_attributes_) |
(...skipping 902 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1234 field->set_section(field->section() + "-cc"); | 1246 field->set_section(field->section() + "-cc"); |
1235 else | 1247 else |
1236 field->set_section(field->section() + "-default"); | 1248 field->set_section(field->section() + "-default"); |
1237 } | 1249 } |
1238 } | 1250 } |
1239 | 1251 |
1240 bool FormStructure::ShouldSkipField(const FormFieldData& field) const { | 1252 bool FormStructure::ShouldSkipField(const FormFieldData& field) const { |
1241 return field.is_checkable; | 1253 return field.is_checkable; |
1242 } | 1254 } |
1243 | 1255 |
1256 void FormStructure::ProcessExtractedFields() { | |
1257 // Update the field name parsed by heuristics if several criteria are met. | |
1258 // Several fields must be present in the form. | |
1259 if (field_count() < kCommonNamePrefixRemovalFieldThreshold) | |
1260 return; | |
1261 | |
1262 // Find the longest common prefix within all the field names. | |
1263 std::vector<base::string16> names; | |
1264 names.reserve(field_count()); | |
1265 for (const AutofillField* field : *this) | |
1266 names.push_back(field->name); | |
1267 | |
1268 base::StringPiece16 longest_prefix = FindLongestCommonPrefix(names); | |
1269 if (longest_prefix.size() < kMinCommonNamePrefixLength) | |
1270 return; | |
1271 | |
1272 // The name without the prefix will be used for heuristics parsing. | |
1273 for (AutofillField* field : *this) { | |
1274 base::string16 truncated = | |
1275 field->name.substr(longest_prefix.size(), field->name.size()); | |
1276 field->set_parseable_name(std::move(truncated)); | |
vabr (Chromium)
2016/01/25 16:40:27
optional nit:
If you inline truncated, you will no
Mathieu
2016/01/25 18:06:28
Thanks!
| |
1277 } | |
1278 } | |
1279 | |
1280 // static | |
1281 base::StringPiece16 FormStructure::FindLongestCommonPrefix( | |
1282 const std::vector<base::string16>& strings) { | |
1283 if (strings.empty()) | |
1284 return base::StringPiece16(); | |
1285 | |
1286 // Go through each character of the first string until there is a mismatch at | |
1287 // the same position in any other string. Adapted from http://goo.gl/YGukMM. | |
1288 for (size_t prefix_len = 0; prefix_len < strings[0].size(); prefix_len++) { | |
1289 for (size_t i = 1; i < strings.size(); i++) { | |
1290 if (prefix_len >= strings[i].size() || | |
1291 strings[i].at(prefix_len) != strings[0].at(prefix_len)) { | |
1292 // Mismatch found. | |
1293 return base::StringPiece16(strings[i].data(), prefix_len); | |
1294 } | |
1295 } | |
1296 } | |
1297 return strings[0]; | |
1298 } | |
1299 | |
1244 } // namespace autofill | 1300 } // namespace autofill |
OLD | NEW |