Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/autofill/core/browser/address_field.h" | 5 #include "components/autofill/core/browser/address_field.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 | 8 |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
| 11 #include "base/strings/string16.h" | 11 #include "base/strings/string16.h" |
| 12 #include "base/strings/string_util.h" | 12 #include "base/strings/string_util.h" |
| 13 #include "base/strings/utf_string_conversions.h" | 13 #include "base/strings/utf_string_conversions.h" |
| 14 #include "components/autofill/core/browser/autofill_field.h" | 14 #include "components/autofill/core/browser/autofill_field.h" |
| 15 #include "components/autofill/core/browser/autofill_regex_constants.h" | 15 #include "components/autofill/core/browser/autofill_regex_constants.h" |
| 16 #include "components/autofill/core/browser/autofill_scanner.h" | 16 #include "components/autofill/core/browser/autofill_scanner.h" |
| 17 #include "components/autofill/core/browser/field_types.h" | 17 #include "components/autofill/core/browser/field_types.h" |
| 18 | 18 |
| 19 using base::UTF8ToUTF16; | 19 using base::UTF8ToUTF16; |
| 20 | 20 |
| 21 namespace autofill { | 21 namespace autofill { |
| 22 | 22 |
| 23 namespace { | |
| 24 | |
| 25 bool SetFieldAndAdvanceCursor(AutofillScanner* scanner, AutofillField** field) { | |
| 26 *field = scanner->Cursor(); | |
| 27 scanner->Advance(); | |
| 28 return true; | |
| 29 } | |
| 30 | |
| 31 } // namespace | |
| 32 | |
| 33 // Some sites use type="tel" for zip fields (to get a numerical input). | |
| 34 // http://crbug.com/426958 | |
| 35 // static | |
|
Evan Stade
2015/03/24 00:04:32
don't think the // static notation is useful for c
Lei Zhang
2015/03/25 00:42:28
Done.
| |
| 36 const int AddressField::kZipCodeMatchType = | |
| 37 MATCH_DEFAULT | MATCH_TELEPHONE | MATCH_NUMBER; | |
| 38 | |
| 39 // Select fields are allowed here. This occurs on top-100 site rediff.com. | |
| 40 // static | |
| 41 const int AddressField::kCityMatchType = MATCH_DEFAULT | MATCH_SELECT; | |
| 42 | |
| 43 // static | |
| 44 const int AddressField::kStateMatchType = MATCH_DEFAULT | MATCH_SELECT; | |
| 45 | |
| 23 scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { | 46 scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { |
| 24 if (scanner->IsEnd()) | 47 if (scanner->IsEnd()) |
| 25 return NULL; | 48 return NULL; |
| 26 | 49 |
| 27 scoped_ptr<AddressField> address_field(new AddressField); | 50 scoped_ptr<AddressField> address_field(new AddressField); |
| 28 const AutofillField* const initial_field = scanner->Cursor(); | 51 const AutofillField* const initial_field = scanner->Cursor(); |
| 29 size_t saved_cursor = scanner->SaveCursor(); | 52 size_t saved_cursor = scanner->SaveCursor(); |
| 30 | 53 |
| 31 base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe); | 54 base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe); |
| 32 base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe); | 55 base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe); |
| 33 | 56 |
| 34 // Allow address fields to appear in any order. | 57 // Allow address fields to appear in any order. |
| 35 size_t begin_trailing_non_labeled_fields = 0; | 58 size_t begin_trailing_non_labeled_fields = 0; |
| 36 bool has_trailing_non_labeled_fields = false; | 59 bool has_trailing_non_labeled_fields = false; |
| 37 while (!scanner->IsEnd()) { | 60 while (!scanner->IsEnd()) { |
| 38 const size_t cursor = scanner->SaveCursor(); | 61 const size_t cursor = scanner->SaveCursor(); |
| 39 if (address_field->ParseAddressLines(scanner) || | 62 if (address_field->ParseAddressLines(scanner) || |
| 40 address_field->ParseCity(scanner) || | 63 address_field->ParseCityStateZipCode(scanner) || |
| 41 address_field->ParseState(scanner) || | |
| 42 address_field->ParseZipCode(scanner) || | |
| 43 address_field->ParseCountry(scanner) || | 64 address_field->ParseCountry(scanner) || |
| 44 address_field->ParseCompany(scanner)) { | 65 address_field->ParseCompany(scanner)) { |
| 45 has_trailing_non_labeled_fields = false; | 66 has_trailing_non_labeled_fields = false; |
| 46 continue; | 67 continue; |
| 47 } else if (ParseField(scanner, attention_ignored, NULL) || | 68 } else if (ParseField(scanner, attention_ignored, NULL) || |
| 48 ParseField(scanner, region_ignored, NULL)) { | 69 ParseField(scanner, region_ignored, NULL)) { |
| 49 // We ignore the following: | 70 // We ignore the following: |
| 50 // * Attention. | 71 // * Attention. |
| 51 // * Province/Region/Other. | 72 // * Province/Region/Other. |
| 52 continue; | 73 continue; |
| (...skipping 161 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 214 MATCH_LABEL | MATCH_NAME | MATCH_SELECT, | 235 MATCH_LABEL | MATCH_NAME | MATCH_SELECT, |
| 215 &country_); | 236 &country_); |
| 216 } | 237 } |
| 217 | 238 |
| 218 bool AddressField::ParseZipCode(AutofillScanner* scanner) { | 239 bool AddressField::ParseZipCode(AutofillScanner* scanner) { |
| 219 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this | 240 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this |
| 220 // is called a "post code". | 241 // is called a "post code". |
| 221 if (zip_) | 242 if (zip_) |
| 222 return false; | 243 return false; |
| 223 | 244 |
| 224 // Some sites use type="tel" for zip fields (to get a numerical input). | |
| 225 // http://crbug.com/426958 | |
| 226 if (!ParseFieldSpecifics(scanner, | 245 if (!ParseFieldSpecifics(scanner, |
| 227 UTF8ToUTF16(kZipCodeRe), | 246 UTF8ToUTF16(kZipCodeRe), |
| 228 MATCH_DEFAULT | MATCH_TELEPHONE, | 247 kZipCodeMatchType, |
| 229 &zip_)) { | 248 &zip_)) { |
| 230 return false; | 249 return false; |
| 231 } | 250 } |
| 232 | 251 |
| 233 // Look for a zip+4, whose field name will also often contain | 252 // Look for a zip+4, whose field name will also often contain |
| 234 // the substring "zip". | 253 // the substring "zip". |
| 235 ParseFieldSpecifics(scanner, | 254 ParseFieldSpecifics(scanner, UTF8ToUTF16(kZip4Re), kZipCodeMatchType, &zip4_); |
| 236 UTF8ToUTF16(kZip4Re), | |
| 237 MATCH_DEFAULT | MATCH_TELEPHONE, | |
| 238 &zip4_); | |
| 239 return true; | 255 return true; |
| 240 } | 256 } |
| 241 | 257 |
| 242 bool AddressField::ParseCity(AutofillScanner* scanner) { | 258 bool AddressField::ParseCity(AutofillScanner* scanner) { |
| 243 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use | 259 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use |
| 244 // the term "town". | 260 // the term "town". |
| 245 if (city_) | 261 if (city_) |
| 246 return false; | 262 return false; |
| 247 | 263 |
| 248 // Select fields are allowed here. This occurs on top-100 site rediff.com. | |
| 249 return ParseFieldSpecifics(scanner, | 264 return ParseFieldSpecifics(scanner, |
| 250 UTF8ToUTF16(kCityRe), | 265 UTF8ToUTF16(kCityRe), |
| 251 MATCH_DEFAULT | MATCH_SELECT, | 266 kCityMatchType, |
| 252 &city_); | 267 &city_); |
| 253 } | 268 } |
| 254 | 269 |
| 255 bool AddressField::ParseState(AutofillScanner* scanner) { | 270 bool AddressField::ParseState(AutofillScanner* scanner) { |
| 256 if (state_) | 271 if (state_) |
| 257 return false; | 272 return false; |
| 258 | 273 |
| 259 return ParseFieldSpecifics(scanner, | 274 return ParseFieldSpecifics(scanner, |
| 260 UTF8ToUTF16(kStateRe), | 275 UTF8ToUTF16(kStateRe), |
| 261 MATCH_DEFAULT | MATCH_SELECT, | 276 kStateMatchType, |
| 262 &state_); | 277 &state_); |
| 263 } | 278 } |
| 264 | 279 |
| 280 bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) { | |
| 281 // Simple cases. | |
| 282 if (scanner->IsEnd()) | |
| 283 return false; | |
| 284 if (city_ && state_ && zip_) | |
| 285 return false; | |
| 286 if (state_ && zip_) | |
| 287 return ParseCity(scanner); | |
| 288 if (city_ && zip_) | |
| 289 return ParseState(scanner); | |
| 290 if (city_ && state_) | |
| 291 return ParseZipCode(scanner); | |
| 292 | |
| 293 // Check for name + label matches. | |
|
Evan Stade
2015/03/24 00:04:32
nit: "Check for matches to both name and label."
Lei Zhang
2015/03/25 00:42:28
Done.
| |
| 294 ParseNameLabelResult city_result = ParseNameAndLabelForCity(scanner); | |
| 295 if (city_result == RESULT_MATCH_NAME_LABEL) | |
| 296 return true; | |
| 297 ParseNameLabelResult state_result = ParseNameAndLabelForState(scanner); | |
| 298 if (state_result == RESULT_MATCH_NAME_LABEL) | |
| 299 return true; | |
| 300 ParseNameLabelResult zip_result = ParseNameAndLabelForZipCode(scanner); | |
| 301 if (zip_result == RESULT_MATCH_NAME_LABEL) | |
| 302 return true; | |
| 303 | |
| 304 // Check if there is only one potential match. | |
| 305 bool maybe_city = (city_result != RESULT_MATCH_NONE); | |
|
Evan Stade
2015/03/24 00:04:32
nit: remove excess parens
Lei Zhang
2015/03/25 00:42:28
Done.
| |
| 306 bool maybe_state = (state_result != RESULT_MATCH_NONE); | |
| 307 bool maybe_zip = (zip_result != RESULT_MATCH_NONE); | |
| 308 if (maybe_city && !maybe_state && !maybe_zip) | |
| 309 return SetFieldAndAdvanceCursor(scanner, &city_); | |
| 310 if (maybe_state && !maybe_city && !maybe_zip) | |
| 311 return SetFieldAndAdvanceCursor(scanner, &state_); | |
| 312 if (maybe_zip && !maybe_city && !maybe_state) | |
| 313 return ParseZipCode(scanner); | |
| 314 | |
| 315 // Otherwise give name priority over label. | |
| 316 if (city_result == RESULT_MATCH_NAME) | |
| 317 return SetFieldAndAdvanceCursor(scanner, &city_); | |
| 318 if (state_result == RESULT_MATCH_NAME) | |
| 319 return SetFieldAndAdvanceCursor(scanner, &state_); | |
| 320 if (zip_result == RESULT_MATCH_NAME) | |
| 321 return ParseZipCode(scanner); | |
| 322 | |
| 323 if (city_result == RESULT_MATCH_LABEL) | |
| 324 return SetFieldAndAdvanceCursor(scanner, &city_); | |
| 325 if (state_result == RESULT_MATCH_LABEL) | |
| 326 return SetFieldAndAdvanceCursor(scanner, &state_); | |
| 327 if (zip_result == RESULT_MATCH_LABEL) | |
| 328 return ParseZipCode(scanner); | |
| 329 | |
| 330 return false; | |
| 331 } | |
| 332 | |
| 333 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode( | |
| 334 AutofillScanner* scanner) { | |
| 335 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this | |
| 336 // is called a "post code". | |
| 337 if (zip_) | |
| 338 return RESULT_MATCH_NONE; | |
| 339 | |
| 340 ParseNameLabelResult result = ParseNameAndLabelSeparately( | |
| 341 scanner, UTF8ToUTF16(kZipCodeRe), kZipCodeMatchType, &zip_); | |
| 342 | |
| 343 if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd()) | |
| 344 return result; | |
| 345 | |
| 346 size_t saved_cursor = scanner->SaveCursor(); | |
| 347 bool found_non_zip4 = ParseCity(scanner); | |
| 348 if (found_non_zip4) | |
| 349 city_ = nullptr; | |
| 350 scanner->RewindTo(saved_cursor); | |
| 351 if (!found_non_zip4) { | |
| 352 found_non_zip4 = ParseState(scanner); | |
| 353 if (found_non_zip4) | |
| 354 state_ = nullptr; | |
| 355 scanner->RewindTo(saved_cursor); | |
| 356 } | |
| 357 | |
| 358 if (!found_non_zip4) { | |
| 359 // Look for a zip+4, whose field name will also often contain | |
| 360 // the substring "zip". | |
| 361 ParseFieldSpecifics(scanner, | |
| 362 UTF8ToUTF16(kZip4Re), | |
| 363 kZipCodeMatchType, | |
| 364 &zip4_); | |
| 365 } | |
| 366 return result; | |
| 367 } | |
| 368 | |
| 369 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity( | |
| 370 AutofillScanner* scanner) { | |
| 371 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use | |
|
Evan Stade
2015/03/24 00:04:32
I dunno where this comment belongs, but not here
Lei Zhang
2015/03/25 00:42:28
That's because it started it out as a copy + paste
| |
| 372 // the term "town". | |
| 373 if (city_) | |
| 374 return RESULT_MATCH_NONE; | |
| 375 | |
| 376 // Select fields are allowed here. This occurs on top-100 site rediff.com. | |
|
Evan Stade
2015/03/24 00:04:32
repeated comment
Lei Zhang
2015/03/25 00:42:28
deleted
| |
| 377 return ParseNameAndLabelSeparately( | |
| 378 scanner, UTF8ToUTF16(kCityRe), kCityMatchType, &city_); | |
| 379 } | |
| 380 | |
| 381 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState( | |
| 382 AutofillScanner* scanner) { | |
| 383 if (state_) | |
| 384 return RESULT_MATCH_NONE; | |
| 385 | |
| 386 return ParseNameAndLabelSeparately( | |
| 387 scanner, UTF8ToUTF16(kStateRe), kStateMatchType, &state_); | |
| 388 } | |
| 389 | |
| 265 } // namespace autofill | 390 } // namespace autofill |
| OLD | NEW |