OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/autofill/core/browser/address_field.h" | 5 #include "components/autofill/core/browser/address_field.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 | 8 |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
11 #include "base/strings/string16.h" | 11 #include "base/strings/string16.h" |
12 #include "base/strings/string_util.h" | 12 #include "base/strings/string_util.h" |
13 #include "base/strings/utf_string_conversions.h" | 13 #include "base/strings/utf_string_conversions.h" |
14 #include "components/autofill/core/browser/autofill_field.h" | 14 #include "components/autofill/core/browser/autofill_field.h" |
15 #include "components/autofill/core/browser/autofill_regex_constants.h" | 15 #include "components/autofill/core/browser/autofill_regex_constants.h" |
16 #include "components/autofill/core/browser/autofill_scanner.h" | 16 #include "components/autofill/core/browser/autofill_scanner.h" |
17 #include "components/autofill/core/browser/field_types.h" | 17 #include "components/autofill/core/browser/field_types.h" |
18 | 18 |
19 using base::UTF8ToUTF16; | 19 using base::UTF8ToUTF16; |
20 | 20 |
21 namespace autofill { | 21 namespace autofill { |
22 | 22 |
23 namespace { | |
24 | |
25 bool SetFieldAndAdvanceCursor(AutofillScanner* scanner, AutofillField** field) { | |
26 *field = scanner->Cursor(); | |
27 scanner->Advance(); | |
28 return true; | |
29 } | |
30 | |
31 } // namespace | |
32 | |
33 // Some sites use type="tel" for zip fields (to get a numerical input). | |
34 // http://crbug.com/426958 | |
35 // static | |
Evan Stade
2015/03/24 00:04:32
don't think the // static notation is useful for c
Lei Zhang
2015/03/25 00:42:28
Done.
| |
36 const int AddressField::kZipCodeMatchType = | |
37 MATCH_DEFAULT | MATCH_TELEPHONE | MATCH_NUMBER; | |
38 | |
39 // Select fields are allowed here. This occurs on top-100 site rediff.com. | |
40 // static | |
41 const int AddressField::kCityMatchType = MATCH_DEFAULT | MATCH_SELECT; | |
42 | |
43 // static | |
44 const int AddressField::kStateMatchType = MATCH_DEFAULT | MATCH_SELECT; | |
45 | |
23 scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { | 46 scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { |
24 if (scanner->IsEnd()) | 47 if (scanner->IsEnd()) |
25 return NULL; | 48 return NULL; |
26 | 49 |
27 scoped_ptr<AddressField> address_field(new AddressField); | 50 scoped_ptr<AddressField> address_field(new AddressField); |
28 const AutofillField* const initial_field = scanner->Cursor(); | 51 const AutofillField* const initial_field = scanner->Cursor(); |
29 size_t saved_cursor = scanner->SaveCursor(); | 52 size_t saved_cursor = scanner->SaveCursor(); |
30 | 53 |
31 base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe); | 54 base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe); |
32 base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe); | 55 base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe); |
33 | 56 |
34 // Allow address fields to appear in any order. | 57 // Allow address fields to appear in any order. |
35 size_t begin_trailing_non_labeled_fields = 0; | 58 size_t begin_trailing_non_labeled_fields = 0; |
36 bool has_trailing_non_labeled_fields = false; | 59 bool has_trailing_non_labeled_fields = false; |
37 while (!scanner->IsEnd()) { | 60 while (!scanner->IsEnd()) { |
38 const size_t cursor = scanner->SaveCursor(); | 61 const size_t cursor = scanner->SaveCursor(); |
39 if (address_field->ParseAddressLines(scanner) || | 62 if (address_field->ParseAddressLines(scanner) || |
40 address_field->ParseCity(scanner) || | 63 address_field->ParseCityStateZipCode(scanner) || |
41 address_field->ParseState(scanner) || | |
42 address_field->ParseZipCode(scanner) || | |
43 address_field->ParseCountry(scanner) || | 64 address_field->ParseCountry(scanner) || |
44 address_field->ParseCompany(scanner)) { | 65 address_field->ParseCompany(scanner)) { |
45 has_trailing_non_labeled_fields = false; | 66 has_trailing_non_labeled_fields = false; |
46 continue; | 67 continue; |
47 } else if (ParseField(scanner, attention_ignored, NULL) || | 68 } else if (ParseField(scanner, attention_ignored, NULL) || |
48 ParseField(scanner, region_ignored, NULL)) { | 69 ParseField(scanner, region_ignored, NULL)) { |
49 // We ignore the following: | 70 // We ignore the following: |
50 // * Attention. | 71 // * Attention. |
51 // * Province/Region/Other. | 72 // * Province/Region/Other. |
52 continue; | 73 continue; |
(...skipping 161 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
214 MATCH_LABEL | MATCH_NAME | MATCH_SELECT, | 235 MATCH_LABEL | MATCH_NAME | MATCH_SELECT, |
215 &country_); | 236 &country_); |
216 } | 237 } |
217 | 238 |
218 bool AddressField::ParseZipCode(AutofillScanner* scanner) { | 239 bool AddressField::ParseZipCode(AutofillScanner* scanner) { |
219 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this | 240 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this |
220 // is called a "post code". | 241 // is called a "post code". |
221 if (zip_) | 242 if (zip_) |
222 return false; | 243 return false; |
223 | 244 |
224 // Some sites use type="tel" for zip fields (to get a numerical input). | |
225 // http://crbug.com/426958 | |
226 if (!ParseFieldSpecifics(scanner, | 245 if (!ParseFieldSpecifics(scanner, |
227 UTF8ToUTF16(kZipCodeRe), | 246 UTF8ToUTF16(kZipCodeRe), |
228 MATCH_DEFAULT | MATCH_TELEPHONE, | 247 kZipCodeMatchType, |
229 &zip_)) { | 248 &zip_)) { |
230 return false; | 249 return false; |
231 } | 250 } |
232 | 251 |
233 // Look for a zip+4, whose field name will also often contain | 252 // Look for a zip+4, whose field name will also often contain |
234 // the substring "zip". | 253 // the substring "zip". |
235 ParseFieldSpecifics(scanner, | 254 ParseFieldSpecifics(scanner, UTF8ToUTF16(kZip4Re), kZipCodeMatchType, &zip4_); |
236 UTF8ToUTF16(kZip4Re), | |
237 MATCH_DEFAULT | MATCH_TELEPHONE, | |
238 &zip4_); | |
239 return true; | 255 return true; |
240 } | 256 } |
241 | 257 |
242 bool AddressField::ParseCity(AutofillScanner* scanner) { | 258 bool AddressField::ParseCity(AutofillScanner* scanner) { |
243 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use | 259 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use |
244 // the term "town". | 260 // the term "town". |
245 if (city_) | 261 if (city_) |
246 return false; | 262 return false; |
247 | 263 |
248 // Select fields are allowed here. This occurs on top-100 site rediff.com. | |
249 return ParseFieldSpecifics(scanner, | 264 return ParseFieldSpecifics(scanner, |
250 UTF8ToUTF16(kCityRe), | 265 UTF8ToUTF16(kCityRe), |
251 MATCH_DEFAULT | MATCH_SELECT, | 266 kCityMatchType, |
252 &city_); | 267 &city_); |
253 } | 268 } |
254 | 269 |
255 bool AddressField::ParseState(AutofillScanner* scanner) { | 270 bool AddressField::ParseState(AutofillScanner* scanner) { |
256 if (state_) | 271 if (state_) |
257 return false; | 272 return false; |
258 | 273 |
259 return ParseFieldSpecifics(scanner, | 274 return ParseFieldSpecifics(scanner, |
260 UTF8ToUTF16(kStateRe), | 275 UTF8ToUTF16(kStateRe), |
261 MATCH_DEFAULT | MATCH_SELECT, | 276 kStateMatchType, |
262 &state_); | 277 &state_); |
263 } | 278 } |
264 | 279 |
280 bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) { | |
281 // Simple cases. | |
282 if (scanner->IsEnd()) | |
283 return false; | |
284 if (city_ && state_ && zip_) | |
285 return false; | |
286 if (state_ && zip_) | |
287 return ParseCity(scanner); | |
288 if (city_ && zip_) | |
289 return ParseState(scanner); | |
290 if (city_ && state_) | |
291 return ParseZipCode(scanner); | |
292 | |
293 // Check for name + label matches. | |
Evan Stade
2015/03/24 00:04:32
nit: "Check for matches to both name and label."
Lei Zhang
2015/03/25 00:42:28
Done.
| |
294 ParseNameLabelResult city_result = ParseNameAndLabelForCity(scanner); | |
295 if (city_result == RESULT_MATCH_NAME_LABEL) | |
296 return true; | |
297 ParseNameLabelResult state_result = ParseNameAndLabelForState(scanner); | |
298 if (state_result == RESULT_MATCH_NAME_LABEL) | |
299 return true; | |
300 ParseNameLabelResult zip_result = ParseNameAndLabelForZipCode(scanner); | |
301 if (zip_result == RESULT_MATCH_NAME_LABEL) | |
302 return true; | |
303 | |
304 // Check if there is only one potential match. | |
305 bool maybe_city = (city_result != RESULT_MATCH_NONE); | |
Evan Stade
2015/03/24 00:04:32
nit: remove excess parens
Lei Zhang
2015/03/25 00:42:28
Done.
| |
306 bool maybe_state = (state_result != RESULT_MATCH_NONE); | |
307 bool maybe_zip = (zip_result != RESULT_MATCH_NONE); | |
308 if (maybe_city && !maybe_state && !maybe_zip) | |
309 return SetFieldAndAdvanceCursor(scanner, &city_); | |
310 if (maybe_state && !maybe_city && !maybe_zip) | |
311 return SetFieldAndAdvanceCursor(scanner, &state_); | |
312 if (maybe_zip && !maybe_city && !maybe_state) | |
313 return ParseZipCode(scanner); | |
314 | |
315 // Otherwise give name priority over label. | |
316 if (city_result == RESULT_MATCH_NAME) | |
317 return SetFieldAndAdvanceCursor(scanner, &city_); | |
318 if (state_result == RESULT_MATCH_NAME) | |
319 return SetFieldAndAdvanceCursor(scanner, &state_); | |
320 if (zip_result == RESULT_MATCH_NAME) | |
321 return ParseZipCode(scanner); | |
322 | |
323 if (city_result == RESULT_MATCH_LABEL) | |
324 return SetFieldAndAdvanceCursor(scanner, &city_); | |
325 if (state_result == RESULT_MATCH_LABEL) | |
326 return SetFieldAndAdvanceCursor(scanner, &state_); | |
327 if (zip_result == RESULT_MATCH_LABEL) | |
328 return ParseZipCode(scanner); | |
329 | |
330 return false; | |
331 } | |
332 | |
333 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode( | |
334 AutofillScanner* scanner) { | |
335 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this | |
336 // is called a "post code". | |
337 if (zip_) | |
338 return RESULT_MATCH_NONE; | |
339 | |
340 ParseNameLabelResult result = ParseNameAndLabelSeparately( | |
341 scanner, UTF8ToUTF16(kZipCodeRe), kZipCodeMatchType, &zip_); | |
342 | |
343 if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd()) | |
344 return result; | |
345 | |
346 size_t saved_cursor = scanner->SaveCursor(); | |
347 bool found_non_zip4 = ParseCity(scanner); | |
348 if (found_non_zip4) | |
349 city_ = nullptr; | |
350 scanner->RewindTo(saved_cursor); | |
351 if (!found_non_zip4) { | |
352 found_non_zip4 = ParseState(scanner); | |
353 if (found_non_zip4) | |
354 state_ = nullptr; | |
355 scanner->RewindTo(saved_cursor); | |
356 } | |
357 | |
358 if (!found_non_zip4) { | |
359 // Look for a zip+4, whose field name will also often contain | |
360 // the substring "zip". | |
361 ParseFieldSpecifics(scanner, | |
362 UTF8ToUTF16(kZip4Re), | |
363 kZipCodeMatchType, | |
364 &zip4_); | |
365 } | |
366 return result; | |
367 } | |
368 | |
369 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity( | |
370 AutofillScanner* scanner) { | |
371 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use | |
Evan Stade
2015/03/24 00:04:32
I dunno where this comment belongs, but not here
Lei Zhang
2015/03/25 00:42:28
That's because it started it out as a copy + paste
| |
372 // the term "town". | |
373 if (city_) | |
374 return RESULT_MATCH_NONE; | |
375 | |
376 // Select fields are allowed here. This occurs on top-100 site rediff.com. | |
Evan Stade
2015/03/24 00:04:32
repeated comment
Lei Zhang
2015/03/25 00:42:28
deleted
| |
377 return ParseNameAndLabelSeparately( | |
378 scanner, UTF8ToUTF16(kCityRe), kCityMatchType, &city_); | |
379 } | |
380 | |
381 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState( | |
382 AutofillScanner* scanner) { | |
383 if (state_) | |
384 return RESULT_MATCH_NONE; | |
385 | |
386 return ParseNameAndLabelSeparately( | |
387 scanner, UTF8ToUTF16(kStateRe), kStateMatchType, &state_); | |
388 } | |
389 | |
265 } // namespace autofill | 390 } // namespace autofill |
OLD | NEW |