| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/autofill/core/browser/address_field.h" | 5 #include "components/autofill/core/browser/address_field.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 | 8 |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
| 11 #include "base/strings/string16.h" | 11 #include "base/strings/string16.h" |
| 12 #include "base/strings/string_util.h" | 12 #include "base/strings/string_util.h" |
| 13 #include "base/strings/utf_string_conversions.h" | 13 #include "base/strings/utf_string_conversions.h" |
| 14 #include "components/autofill/core/browser/autofill_field.h" | 14 #include "components/autofill/core/browser/autofill_field.h" |
| 15 #include "components/autofill/core/browser/autofill_regex_constants.h" | 15 #include "components/autofill/core/browser/autofill_regex_constants.h" |
| 16 #include "components/autofill/core/browser/autofill_scanner.h" | 16 #include "components/autofill/core/browser/autofill_scanner.h" |
| 17 #include "components/autofill/core/browser/field_types.h" | 17 #include "components/autofill/core/browser/field_types.h" |
| 18 | 18 |
| 19 using base::UTF8ToUTF16; | 19 using base::UTF8ToUTF16; |
| 20 | 20 |
| 21 namespace autofill { | 21 namespace autofill { |
| 22 | 22 |
| 23 namespace { |
| 24 |
| 25 bool SetFieldAndAdvanceCursor(AutofillScanner* scanner, AutofillField** field) { |
| 26 *field = scanner->Cursor(); |
| 27 scanner->Advance(); |
| 28 return true; |
| 29 } |
| 30 |
| 31 } // namespace |
| 32 |
| 33 // Some sites use type="tel" for zip fields (to get a numerical input). |
| 34 // http://crbug.com/426958 |
| 35 const int AddressField::kZipCodeMatchType = |
| 36 MATCH_DEFAULT | MATCH_TELEPHONE | MATCH_NUMBER; |
| 37 |
| 38 // Select fields are allowed here. This occurs on top-100 site rediff.com. |
| 39 const int AddressField::kCityMatchType = MATCH_DEFAULT | MATCH_SELECT; |
| 40 |
| 41 const int AddressField::kStateMatchType = MATCH_DEFAULT | MATCH_SELECT; |
| 42 |
| 23 scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { | 43 scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { |
| 24 if (scanner->IsEnd()) | 44 if (scanner->IsEnd()) |
| 25 return NULL; | 45 return NULL; |
| 26 | 46 |
| 27 scoped_ptr<AddressField> address_field(new AddressField); | 47 scoped_ptr<AddressField> address_field(new AddressField); |
| 28 const AutofillField* const initial_field = scanner->Cursor(); | 48 const AutofillField* const initial_field = scanner->Cursor(); |
| 29 size_t saved_cursor = scanner->SaveCursor(); | 49 size_t saved_cursor = scanner->SaveCursor(); |
| 30 | 50 |
| 31 base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe); | 51 base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe); |
| 32 base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe); | 52 base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe); |
| 33 | 53 |
| 34 // Allow address fields to appear in any order. | 54 // Allow address fields to appear in any order. |
| 35 size_t begin_trailing_non_labeled_fields = 0; | 55 size_t begin_trailing_non_labeled_fields = 0; |
| 36 bool has_trailing_non_labeled_fields = false; | 56 bool has_trailing_non_labeled_fields = false; |
| 37 while (!scanner->IsEnd()) { | 57 while (!scanner->IsEnd()) { |
| 38 const size_t cursor = scanner->SaveCursor(); | 58 const size_t cursor = scanner->SaveCursor(); |
| 39 if (address_field->ParseAddressLines(scanner) || | 59 if (address_field->ParseAddressLines(scanner) || |
| 40 address_field->ParseCity(scanner) || | 60 address_field->ParseCityStateZipCode(scanner) || |
| 41 address_field->ParseState(scanner) || | |
| 42 address_field->ParseZipCode(scanner) || | |
| 43 address_field->ParseCountry(scanner) || | 61 address_field->ParseCountry(scanner) || |
| 44 address_field->ParseCompany(scanner)) { | 62 address_field->ParseCompany(scanner)) { |
| 45 has_trailing_non_labeled_fields = false; | 63 has_trailing_non_labeled_fields = false; |
| 46 continue; | 64 continue; |
| 47 } else if (ParseField(scanner, attention_ignored, NULL) || | 65 } else if (ParseField(scanner, attention_ignored, NULL) || |
| 48 ParseField(scanner, region_ignored, NULL)) { | 66 ParseField(scanner, region_ignored, NULL)) { |
| 49 // We ignore the following: | 67 // We ignore the following: |
| 50 // * Attention. | 68 // * Attention. |
| 51 // * Province/Region/Other. | 69 // * Province/Region/Other. |
| 52 continue; | 70 continue; |
| (...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 209 // The occasional page (e.g. google account registration page) calls this a | 227 // The occasional page (e.g. google account registration page) calls this a |
| 210 // "location". However, this only makes sense for select tags. | 228 // "location". However, this only makes sense for select tags. |
| 211 scanner->Rewind(); | 229 scanner->Rewind(); |
| 212 return ParseFieldSpecifics(scanner, | 230 return ParseFieldSpecifics(scanner, |
| 213 UTF8ToUTF16(kCountryLocationRe), | 231 UTF8ToUTF16(kCountryLocationRe), |
| 214 MATCH_LABEL | MATCH_NAME | MATCH_SELECT, | 232 MATCH_LABEL | MATCH_NAME | MATCH_SELECT, |
| 215 &country_); | 233 &country_); |
| 216 } | 234 } |
| 217 | 235 |
| 218 bool AddressField::ParseZipCode(AutofillScanner* scanner) { | 236 bool AddressField::ParseZipCode(AutofillScanner* scanner) { |
| 219 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this | |
| 220 // is called a "post code". | |
| 221 if (zip_) | 237 if (zip_) |
| 222 return false; | 238 return false; |
| 223 | 239 |
| 224 // Some sites use type="tel" for zip fields (to get a numerical input). | |
| 225 // http://crbug.com/426958 | |
| 226 if (!ParseFieldSpecifics(scanner, | 240 if (!ParseFieldSpecifics(scanner, |
| 227 UTF8ToUTF16(kZipCodeRe), | 241 UTF8ToUTF16(kZipCodeRe), |
| 228 MATCH_DEFAULT | MATCH_TELEPHONE, | 242 kZipCodeMatchType, |
| 229 &zip_)) { | 243 &zip_)) { |
| 230 return false; | 244 return false; |
| 231 } | 245 } |
| 232 | 246 |
| 233 // Look for a zip+4, whose field name will also often contain | 247 // Look for a zip+4, whose field name will also often contain |
| 234 // the substring "zip". | 248 // the substring "zip". |
| 235 ParseFieldSpecifics(scanner, | 249 ParseFieldSpecifics(scanner, UTF8ToUTF16(kZip4Re), kZipCodeMatchType, &zip4_); |
| 236 UTF8ToUTF16(kZip4Re), | |
| 237 MATCH_DEFAULT | MATCH_TELEPHONE, | |
| 238 &zip4_); | |
| 239 return true; | 250 return true; |
| 240 } | 251 } |
| 241 | 252 |
| 242 bool AddressField::ParseCity(AutofillScanner* scanner) { | 253 bool AddressField::ParseCity(AutofillScanner* scanner) { |
| 243 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use | |
| 244 // the term "town". | |
| 245 if (city_) | 254 if (city_) |
| 246 return false; | 255 return false; |
| 247 | 256 |
| 248 // Select fields are allowed here. This occurs on top-100 site rediff.com. | |
| 249 return ParseFieldSpecifics(scanner, | 257 return ParseFieldSpecifics(scanner, |
| 250 UTF8ToUTF16(kCityRe), | 258 UTF8ToUTF16(kCityRe), |
| 251 MATCH_DEFAULT | MATCH_SELECT, | 259 kCityMatchType, |
| 252 &city_); | 260 &city_); |
| 253 } | 261 } |
| 254 | 262 |
| 255 bool AddressField::ParseState(AutofillScanner* scanner) { | 263 bool AddressField::ParseState(AutofillScanner* scanner) { |
| 256 if (state_) | 264 if (state_) |
| 257 return false; | 265 return false; |
| 258 | 266 |
| 259 return ParseFieldSpecifics(scanner, | 267 return ParseFieldSpecifics(scanner, |
| 260 UTF8ToUTF16(kStateRe), | 268 UTF8ToUTF16(kStateRe), |
| 261 MATCH_DEFAULT | MATCH_SELECT, | 269 kStateMatchType, |
| 262 &state_); | 270 &state_); |
| 263 } | 271 } |
| 264 | 272 |
| 273 bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) { |
| 274 // Simple cases. |
| 275 if (scanner->IsEnd()) |
| 276 return false; |
| 277 if (city_ && state_ && zip_) |
| 278 return false; |
| 279 if (state_ && zip_) |
| 280 return ParseCity(scanner); |
| 281 if (city_ && zip_) |
| 282 return ParseState(scanner); |
| 283 if (city_ && state_) |
| 284 return ParseZipCode(scanner); |
| 285 |
| 286 // Check for matches to both name and label. |
| 287 ParseNameLabelResult city_result = ParseNameAndLabelForCity(scanner); |
| 288 if (city_result == RESULT_MATCH_NAME_LABEL) |
| 289 return true; |
| 290 ParseNameLabelResult state_result = ParseNameAndLabelForState(scanner); |
| 291 if (state_result == RESULT_MATCH_NAME_LABEL) |
| 292 return true; |
| 293 ParseNameLabelResult zip_result = ParseNameAndLabelForZipCode(scanner); |
| 294 if (zip_result == RESULT_MATCH_NAME_LABEL) |
| 295 return true; |
| 296 |
| 297 // Check if there is only one potential match. |
| 298 bool maybe_city = city_result != RESULT_MATCH_NONE; |
| 299 bool maybe_state = state_result != RESULT_MATCH_NONE; |
| 300 bool maybe_zip = zip_result != RESULT_MATCH_NONE; |
| 301 if (maybe_city && !maybe_state && !maybe_zip) |
| 302 return SetFieldAndAdvanceCursor(scanner, &city_); |
| 303 if (maybe_state && !maybe_city && !maybe_zip) |
| 304 return SetFieldAndAdvanceCursor(scanner, &state_); |
| 305 if (maybe_zip && !maybe_city && !maybe_state) |
| 306 return ParseZipCode(scanner); |
| 307 |
| 308 // Otherwise give name priority over label. |
| 309 if (city_result == RESULT_MATCH_NAME) |
| 310 return SetFieldAndAdvanceCursor(scanner, &city_); |
| 311 if (state_result == RESULT_MATCH_NAME) |
| 312 return SetFieldAndAdvanceCursor(scanner, &state_); |
| 313 if (zip_result == RESULT_MATCH_NAME) |
| 314 return ParseZipCode(scanner); |
| 315 |
| 316 if (city_result == RESULT_MATCH_LABEL) |
| 317 return SetFieldAndAdvanceCursor(scanner, &city_); |
| 318 if (state_result == RESULT_MATCH_LABEL) |
| 319 return SetFieldAndAdvanceCursor(scanner, &state_); |
| 320 if (zip_result == RESULT_MATCH_LABEL) |
| 321 return ParseZipCode(scanner); |
| 322 |
| 323 return false; |
| 324 } |
| 325 |
| 326 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode( |
| 327 AutofillScanner* scanner) { |
| 328 if (zip_) |
| 329 return RESULT_MATCH_NONE; |
| 330 |
| 331 ParseNameLabelResult result = ParseNameAndLabelSeparately( |
| 332 scanner, UTF8ToUTF16(kZipCodeRe), kZipCodeMatchType, &zip_); |
| 333 |
| 334 if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd()) |
| 335 return result; |
| 336 |
| 337 size_t saved_cursor = scanner->SaveCursor(); |
| 338 bool found_non_zip4 = ParseCity(scanner); |
| 339 if (found_non_zip4) |
| 340 city_ = nullptr; |
| 341 scanner->RewindTo(saved_cursor); |
| 342 if (!found_non_zip4) { |
| 343 found_non_zip4 = ParseState(scanner); |
| 344 if (found_non_zip4) |
| 345 state_ = nullptr; |
| 346 scanner->RewindTo(saved_cursor); |
| 347 } |
| 348 |
| 349 if (!found_non_zip4) { |
| 350 // Look for a zip+4, whose field name will also often contain |
| 351 // the substring "zip". |
| 352 ParseFieldSpecifics(scanner, |
| 353 UTF8ToUTF16(kZip4Re), |
| 354 kZipCodeMatchType, |
| 355 &zip4_); |
| 356 } |
| 357 return result; |
| 358 } |
| 359 |
| 360 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity( |
| 361 AutofillScanner* scanner) { |
| 362 if (city_) |
| 363 return RESULT_MATCH_NONE; |
| 364 |
| 365 return ParseNameAndLabelSeparately( |
| 366 scanner, UTF8ToUTF16(kCityRe), kCityMatchType, &city_); |
| 367 } |
| 368 |
| 369 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState( |
| 370 AutofillScanner* scanner) { |
| 371 if (state_) |
| 372 return RESULT_MATCH_NONE; |
| 373 |
| 374 return ParseNameAndLabelSeparately( |
| 375 scanner, UTF8ToUTF16(kStateRe), kStateMatchType, &state_); |
| 376 } |
| 377 |
| 265 } // namespace autofill | 378 } // namespace autofill |
| OLD | NEW |