OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/autofill/core/browser/address_field.h" | 5 #include "components/autofill/core/browser/address_field.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 | 8 |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
11 #include "base/strings/string16.h" | 11 #include "base/strings/string16.h" |
12 #include "base/strings/string_util.h" | 12 #include "base/strings/string_util.h" |
13 #include "base/strings/utf_string_conversions.h" | 13 #include "base/strings/utf_string_conversions.h" |
14 #include "components/autofill/core/browser/autofill_field.h" | 14 #include "components/autofill/core/browser/autofill_field.h" |
15 #include "components/autofill/core/browser/autofill_regex_constants.h" | 15 #include "components/autofill/core/browser/autofill_regex_constants.h" |
16 #include "components/autofill/core/browser/autofill_scanner.h" | 16 #include "components/autofill/core/browser/autofill_scanner.h" |
17 #include "components/autofill/core/browser/field_types.h" | 17 #include "components/autofill/core/browser/field_types.h" |
18 | 18 |
19 using base::UTF8ToUTF16; | 19 using base::UTF8ToUTF16; |
20 | 20 |
21 namespace autofill { | 21 namespace autofill { |
22 | 22 |
| 23 namespace { |
| 24 |
| 25 bool SetFieldAndAdvanceCursor(AutofillScanner* scanner, AutofillField** field) { |
| 26 *field = scanner->Cursor(); |
| 27 scanner->Advance(); |
| 28 return true; |
| 29 } |
| 30 |
| 31 } // namespace |
| 32 |
| 33 // Some sites use type="tel" for zip fields (to get a numerical input). |
| 34 // http://crbug.com/426958 |
| 35 const int AddressField::kZipCodeMatchType = |
| 36 MATCH_DEFAULT | MATCH_TELEPHONE | MATCH_NUMBER; |
| 37 |
| 38 // Select fields are allowed here. This occurs on top-100 site rediff.com. |
| 39 const int AddressField::kCityMatchType = MATCH_DEFAULT | MATCH_SELECT; |
| 40 |
| 41 const int AddressField::kStateMatchType = MATCH_DEFAULT | MATCH_SELECT; |
| 42 |
23 scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { | 43 scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { |
24 if (scanner->IsEnd()) | 44 if (scanner->IsEnd()) |
25 return NULL; | 45 return NULL; |
26 | 46 |
27 scoped_ptr<AddressField> address_field(new AddressField); | 47 scoped_ptr<AddressField> address_field(new AddressField); |
28 const AutofillField* const initial_field = scanner->Cursor(); | 48 const AutofillField* const initial_field = scanner->Cursor(); |
29 size_t saved_cursor = scanner->SaveCursor(); | 49 size_t saved_cursor = scanner->SaveCursor(); |
30 | 50 |
31 base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe); | 51 base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe); |
32 base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe); | 52 base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe); |
33 | 53 |
34 // Allow address fields to appear in any order. | 54 // Allow address fields to appear in any order. |
35 size_t begin_trailing_non_labeled_fields = 0; | 55 size_t begin_trailing_non_labeled_fields = 0; |
36 bool has_trailing_non_labeled_fields = false; | 56 bool has_trailing_non_labeled_fields = false; |
37 while (!scanner->IsEnd()) { | 57 while (!scanner->IsEnd()) { |
38 const size_t cursor = scanner->SaveCursor(); | 58 const size_t cursor = scanner->SaveCursor(); |
39 if (address_field->ParseAddressLines(scanner) || | 59 if (address_field->ParseAddressLines(scanner) || |
40 address_field->ParseCity(scanner) || | 60 address_field->ParseCityStateZipCode(scanner) || |
41 address_field->ParseState(scanner) || | |
42 address_field->ParseZipCode(scanner) || | |
43 address_field->ParseCountry(scanner) || | 61 address_field->ParseCountry(scanner) || |
44 address_field->ParseCompany(scanner)) { | 62 address_field->ParseCompany(scanner)) { |
45 has_trailing_non_labeled_fields = false; | 63 has_trailing_non_labeled_fields = false; |
46 continue; | 64 continue; |
47 } else if (ParseField(scanner, attention_ignored, NULL) || | 65 } else if (ParseField(scanner, attention_ignored, NULL) || |
48 ParseField(scanner, region_ignored, NULL)) { | 66 ParseField(scanner, region_ignored, NULL)) { |
49 // We ignore the following: | 67 // We ignore the following: |
50 // * Attention. | 68 // * Attention. |
51 // * Province/Region/Other. | 69 // * Province/Region/Other. |
52 continue; | 70 continue; |
(...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
209 // The occasional page (e.g. google account registration page) calls this a | 227 // The occasional page (e.g. google account registration page) calls this a |
210 // "location". However, this only makes sense for select tags. | 228 // "location". However, this only makes sense for select tags. |
211 scanner->Rewind(); | 229 scanner->Rewind(); |
212 return ParseFieldSpecifics(scanner, | 230 return ParseFieldSpecifics(scanner, |
213 UTF8ToUTF16(kCountryLocationRe), | 231 UTF8ToUTF16(kCountryLocationRe), |
214 MATCH_LABEL | MATCH_NAME | MATCH_SELECT, | 232 MATCH_LABEL | MATCH_NAME | MATCH_SELECT, |
215 &country_); | 233 &country_); |
216 } | 234 } |
217 | 235 |
218 bool AddressField::ParseZipCode(AutofillScanner* scanner) { | 236 bool AddressField::ParseZipCode(AutofillScanner* scanner) { |
219 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this | |
220 // is called a "post code". | |
221 if (zip_) | 237 if (zip_) |
222 return false; | 238 return false; |
223 | 239 |
224 // Some sites use type="tel" for zip fields (to get a numerical input). | |
225 // http://crbug.com/426958 | |
226 if (!ParseFieldSpecifics(scanner, | 240 if (!ParseFieldSpecifics(scanner, |
227 UTF8ToUTF16(kZipCodeRe), | 241 UTF8ToUTF16(kZipCodeRe), |
228 MATCH_DEFAULT | MATCH_TELEPHONE, | 242 kZipCodeMatchType, |
229 &zip_)) { | 243 &zip_)) { |
230 return false; | 244 return false; |
231 } | 245 } |
232 | 246 |
233 // Look for a zip+4, whose field name will also often contain | 247 // Look for a zip+4, whose field name will also often contain |
234 // the substring "zip". | 248 // the substring "zip". |
235 ParseFieldSpecifics(scanner, | 249 ParseFieldSpecifics(scanner, UTF8ToUTF16(kZip4Re), kZipCodeMatchType, &zip4_); |
236 UTF8ToUTF16(kZip4Re), | |
237 MATCH_DEFAULT | MATCH_TELEPHONE, | |
238 &zip4_); | |
239 return true; | 250 return true; |
240 } | 251 } |
241 | 252 |
242 bool AddressField::ParseCity(AutofillScanner* scanner) { | 253 bool AddressField::ParseCity(AutofillScanner* scanner) { |
243 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use | |
244 // the term "town". | |
245 if (city_) | 254 if (city_) |
246 return false; | 255 return false; |
247 | 256 |
248 // Select fields are allowed here. This occurs on top-100 site rediff.com. | |
249 return ParseFieldSpecifics(scanner, | 257 return ParseFieldSpecifics(scanner, |
250 UTF8ToUTF16(kCityRe), | 258 UTF8ToUTF16(kCityRe), |
251 MATCH_DEFAULT | MATCH_SELECT, | 259 kCityMatchType, |
252 &city_); | 260 &city_); |
253 } | 261 } |
254 | 262 |
255 bool AddressField::ParseState(AutofillScanner* scanner) { | 263 bool AddressField::ParseState(AutofillScanner* scanner) { |
256 if (state_) | 264 if (state_) |
257 return false; | 265 return false; |
258 | 266 |
259 return ParseFieldSpecifics(scanner, | 267 return ParseFieldSpecifics(scanner, |
260 UTF8ToUTF16(kStateRe), | 268 UTF8ToUTF16(kStateRe), |
261 MATCH_DEFAULT | MATCH_SELECT, | 269 kStateMatchType, |
262 &state_); | 270 &state_); |
263 } | 271 } |
264 | 272 |
| 273 bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) { |
| 274 // Simple cases. |
| 275 if (scanner->IsEnd()) |
| 276 return false; |
| 277 if (city_ && state_ && zip_) |
| 278 return false; |
| 279 if (state_ && zip_) |
| 280 return ParseCity(scanner); |
| 281 if (city_ && zip_) |
| 282 return ParseState(scanner); |
| 283 if (city_ && state_) |
| 284 return ParseZipCode(scanner); |
| 285 |
| 286 // Check for matches to both name and label. |
| 287 ParseNameLabelResult city_result = ParseNameAndLabelForCity(scanner); |
| 288 if (city_result == RESULT_MATCH_NAME_LABEL) |
| 289 return true; |
| 290 ParseNameLabelResult state_result = ParseNameAndLabelForState(scanner); |
| 291 if (state_result == RESULT_MATCH_NAME_LABEL) |
| 292 return true; |
| 293 ParseNameLabelResult zip_result = ParseNameAndLabelForZipCode(scanner); |
| 294 if (zip_result == RESULT_MATCH_NAME_LABEL) |
| 295 return true; |
| 296 |
| 297 // Check if there is only one potential match. |
| 298 bool maybe_city = city_result != RESULT_MATCH_NONE; |
| 299 bool maybe_state = state_result != RESULT_MATCH_NONE; |
| 300 bool maybe_zip = zip_result != RESULT_MATCH_NONE; |
| 301 if (maybe_city && !maybe_state && !maybe_zip) |
| 302 return SetFieldAndAdvanceCursor(scanner, &city_); |
| 303 if (maybe_state && !maybe_city && !maybe_zip) |
| 304 return SetFieldAndAdvanceCursor(scanner, &state_); |
| 305 if (maybe_zip && !maybe_city && !maybe_state) |
| 306 return ParseZipCode(scanner); |
| 307 |
| 308 // Otherwise give name priority over label. |
| 309 if (city_result == RESULT_MATCH_NAME) |
| 310 return SetFieldAndAdvanceCursor(scanner, &city_); |
| 311 if (state_result == RESULT_MATCH_NAME) |
| 312 return SetFieldAndAdvanceCursor(scanner, &state_); |
| 313 if (zip_result == RESULT_MATCH_NAME) |
| 314 return ParseZipCode(scanner); |
| 315 |
| 316 if (city_result == RESULT_MATCH_LABEL) |
| 317 return SetFieldAndAdvanceCursor(scanner, &city_); |
| 318 if (state_result == RESULT_MATCH_LABEL) |
| 319 return SetFieldAndAdvanceCursor(scanner, &state_); |
| 320 if (zip_result == RESULT_MATCH_LABEL) |
| 321 return ParseZipCode(scanner); |
| 322 |
| 323 return false; |
| 324 } |
| 325 |
| 326 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode( |
| 327 AutofillScanner* scanner) { |
| 328 if (zip_) |
| 329 return RESULT_MATCH_NONE; |
| 330 |
| 331 ParseNameLabelResult result = ParseNameAndLabelSeparately( |
| 332 scanner, UTF8ToUTF16(kZipCodeRe), kZipCodeMatchType, &zip_); |
| 333 |
| 334 if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd()) |
| 335 return result; |
| 336 |
| 337 size_t saved_cursor = scanner->SaveCursor(); |
| 338 bool found_non_zip4 = ParseCity(scanner); |
| 339 if (found_non_zip4) |
| 340 city_ = nullptr; |
| 341 scanner->RewindTo(saved_cursor); |
| 342 if (!found_non_zip4) { |
| 343 found_non_zip4 = ParseState(scanner); |
| 344 if (found_non_zip4) |
| 345 state_ = nullptr; |
| 346 scanner->RewindTo(saved_cursor); |
| 347 } |
| 348 |
| 349 if (!found_non_zip4) { |
| 350 // Look for a zip+4, whose field name will also often contain |
| 351 // the substring "zip". |
| 352 ParseFieldSpecifics(scanner, |
| 353 UTF8ToUTF16(kZip4Re), |
| 354 kZipCodeMatchType, |
| 355 &zip4_); |
| 356 } |
| 357 return result; |
| 358 } |
| 359 |
| 360 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity( |
| 361 AutofillScanner* scanner) { |
| 362 if (city_) |
| 363 return RESULT_MATCH_NONE; |
| 364 |
| 365 return ParseNameAndLabelSeparately( |
| 366 scanner, UTF8ToUTF16(kCityRe), kCityMatchType, &city_); |
| 367 } |
| 368 |
| 369 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState( |
| 370 AutofillScanner* scanner) { |
| 371 if (state_) |
| 372 return RESULT_MATCH_NONE; |
| 373 |
| 374 return ParseNameAndLabelSeparately( |
| 375 scanner, UTF8ToUTF16(kStateRe), kStateMatchType, &state_); |
| 376 } |
| 377 |
265 } // namespace autofill | 378 } // namespace autofill |
OLD | NEW |