OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/autofill/core/browser/address_field.h" | 5 #include "components/autofill/core/browser/address_field.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 | 8 |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
11 #include "base/strings/string16.h" | 11 #include "base/strings/string16.h" |
12 #include "base/strings/string_util.h" | 12 #include "base/strings/string_util.h" |
13 #include "base/strings/utf_string_conversions.h" | 13 #include "base/strings/utf_string_conversions.h" |
14 #include "components/autofill/core/browser/autofill_field.h" | 14 #include "components/autofill/core/browser/autofill_field.h" |
15 #include "components/autofill/core/browser/autofill_regex_constants.h" | 15 #include "components/autofill/core/browser/autofill_regex_constants.h" |
16 #include "components/autofill/core/browser/autofill_scanner.h" | 16 #include "components/autofill/core/browser/autofill_scanner.h" |
17 #include "components/autofill/core/browser/field_types.h" | 17 #include "components/autofill/core/browser/field_types.h" |
18 | 18 |
19 using base::ASCIIToUTF16; | |
19 using base::UTF8ToUTF16; | 20 using base::UTF8ToUTF16; |
Ilya Sherman
2015/11/26 02:25:09
Are these needed?
tfarina
2015/11/26 14:22:27
Done.
| |
20 | 21 |
21 namespace autofill { | 22 namespace autofill { |
22 | 23 |
23 namespace { | 24 namespace { |
24 | 25 |
25 bool SetFieldAndAdvanceCursor(AutofillScanner* scanner, AutofillField** field) { | 26 bool SetFieldAndAdvanceCursor(AutofillScanner* scanner, AutofillField** field) { |
26 *field = scanner->Cursor(); | 27 *field = scanner->Cursor(); |
27 scanner->Advance(); | 28 scanner->Advance(); |
28 return true; | 29 return true; |
29 } | 30 } |
(...skipping 11 matching lines...) Expand all Loading... | |
41 const int AddressField::kStateMatchType = MATCH_DEFAULT | MATCH_SELECT; | 42 const int AddressField::kStateMatchType = MATCH_DEFAULT | MATCH_SELECT; |
42 | 43 |
43 scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { | 44 scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { |
44 if (scanner->IsEnd()) | 45 if (scanner->IsEnd()) |
45 return NULL; | 46 return NULL; |
46 | 47 |
47 scoped_ptr<AddressField> address_field(new AddressField); | 48 scoped_ptr<AddressField> address_field(new AddressField); |
48 const AutofillField* const initial_field = scanner->Cursor(); | 49 const AutofillField* const initial_field = scanner->Cursor(); |
49 size_t saved_cursor = scanner->SaveCursor(); | 50 size_t saved_cursor = scanner->SaveCursor(); |
50 | 51 |
51 base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe); | |
52 base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe); | |
53 | |
54 // Allow address fields to appear in any order. | 52 // Allow address fields to appear in any order. |
55 size_t begin_trailing_non_labeled_fields = 0; | 53 size_t begin_trailing_non_labeled_fields = 0; |
56 bool has_trailing_non_labeled_fields = false; | 54 bool has_trailing_non_labeled_fields = false; |
57 while (!scanner->IsEnd()) { | 55 while (!scanner->IsEnd()) { |
58 const size_t cursor = scanner->SaveCursor(); | 56 const size_t cursor = scanner->SaveCursor(); |
59 if (address_field->ParseAddressLines(scanner) || | 57 if (address_field->ParseAddressLines(scanner) || |
60 address_field->ParseCityStateZipCode(scanner) || | 58 address_field->ParseCityStateZipCode(scanner) || |
61 address_field->ParseCountry(scanner) || | 59 address_field->ParseCountry(scanner) || |
62 address_field->ParseCompany(scanner)) { | 60 address_field->ParseCompany(scanner)) { |
63 has_trailing_non_labeled_fields = false; | 61 has_trailing_non_labeled_fields = false; |
64 continue; | 62 continue; |
65 } else if (ParseField(scanner, attention_ignored, NULL) || | 63 } else if (ParseField(scanner, kAttentionIgnoredRe, NULL) || |
66 ParseField(scanner, region_ignored, NULL)) { | 64 ParseField(scanner, kRegionIgnoredRe, NULL)) { |
67 // We ignore the following: | 65 // We ignore the following: |
68 // * Attention. | 66 // * Attention. |
69 // * Province/Region/Other. | 67 // * Province/Region/Other. |
70 continue; | 68 continue; |
71 } else if (scanner->Cursor() != initial_field && | 69 } else if (scanner->Cursor() != initial_field && |
72 ParseEmptyLabel(scanner, NULL)) { | 70 ParseEmptyLabel(scanner, NULL)) { |
73 // Ignore non-labeled fields within an address; the page | 71 // Ignore non-labeled fields within an address; the page |
74 // MapQuest Driving Directions North America.html contains such a field. | 72 // MapQuest Driving Directions North America.html contains such a field. |
75 // We only ignore such fields after we've parsed at least one other field; | 73 // We only ignore such fields after we've parsed at least one other field; |
76 // otherwise we'd effectively parse address fields before other field | 74 // otherwise we'd effectively parse address fields before other field |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
141 AddClassification(city_, ADDRESS_HOME_CITY, map) && | 139 AddClassification(city_, ADDRESS_HOME_CITY, map) && |
142 AddClassification(state_, ADDRESS_HOME_STATE, map) && | 140 AddClassification(state_, ADDRESS_HOME_STATE, map) && |
143 AddClassification(zip_, ADDRESS_HOME_ZIP, map) && | 141 AddClassification(zip_, ADDRESS_HOME_ZIP, map) && |
144 AddClassification(country_, ADDRESS_HOME_COUNTRY, map); | 142 AddClassification(country_, ADDRESS_HOME_COUNTRY, map); |
145 } | 143 } |
146 | 144 |
147 bool AddressField::ParseCompany(AutofillScanner* scanner) { | 145 bool AddressField::ParseCompany(AutofillScanner* scanner) { |
148 if (company_ && !company_->IsEmpty()) | 146 if (company_ && !company_->IsEmpty()) |
149 return false; | 147 return false; |
150 | 148 |
151 return ParseField(scanner, UTF8ToUTF16(kCompanyRe), &company_); | 149 return ParseField(scanner, kCompanyRe, &company_); |
152 } | 150 } |
153 | 151 |
154 bool AddressField::ParseAddressLines(AutofillScanner* scanner) { | 152 bool AddressField::ParseAddressLines(AutofillScanner* scanner) { |
155 // We only match the string "address" in page text, not in element names, | 153 // We only match the string "address" in page text, not in element names, |
156 // because sometimes every element in a group of address fields will have | 154 // because sometimes every element in a group of address fields will have |
157 // a name containing the string "address"; for example, on the page | 155 // a name containing the string "address"; for example, on the page |
158 // Kohl's - Register Billing Address.html the text element labeled "city" | 156 // Kohl's - Register Billing Address.html the text element labeled "city" |
159 // has the name "BILL_TO_ADDRESS<>city". We do match address labels | 157 // has the name "BILL_TO_ADDRESS<>city". We do match address labels |
160 // such as "address1", which appear as element names on various pages (eg | 158 // such as "address1", which appear as element names on various pages (eg |
161 // AmericanGirl-Registration.html, BloomingdalesBilling.html, | 159 // AmericanGirl-Registration.html, BloomingdalesBilling.html, |
162 // EBay Registration Enter Information.html). | 160 // EBay Registration Enter Information.html). |
163 if (address1_ || street_address_) | 161 if (address1_ || street_address_) |
164 return false; | 162 return false; |
165 | 163 |
166 // Ignore "Address Lookup" field. http://crbug.com/427622 | 164 // Ignore "Address Lookup" field. http://crbug.com/427622 |
167 if (ParseField(scanner, base::UTF8ToUTF16(kAddressLookupRe), NULL)) | 165 if (ParseField(scanner, kAddressLookupRe, NULL)) |
168 return false; | 166 return false; |
169 | 167 |
170 base::string16 pattern = UTF8ToUTF16(kAddressLine1Re); | 168 if (!ParseFieldSpecifics(scanner, kAddressLine1Re, MATCH_DEFAULT, |
171 base::string16 label_pattern = UTF8ToUTF16(kAddressLine1LabelRe); | |
172 if (!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, &address1_) && | |
173 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | |
174 &address1_) && | 169 &address1_) && |
175 !ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_TEXT_AREA, | 170 !ParseFieldSpecifics(scanner, kAddressLine1LabelRe, |
176 &street_address_) && | 171 MATCH_LABEL | MATCH_TEXT, &address1_) && |
177 !ParseFieldSpecifics(scanner, label_pattern, | 172 !ParseFieldSpecifics(scanner, kAddressLine1Re, |
178 MATCH_LABEL | MATCH_TEXT_AREA, | 173 MATCH_DEFAULT | MATCH_TEXT_AREA, &street_address_) && |
179 &street_address_)) | 174 !ParseFieldSpecifics(scanner, kAddressLine1LabelRe, |
175 MATCH_LABEL | MATCH_TEXT_AREA, &street_address_)) | |
180 return false; | 176 return false; |
181 | 177 |
182 if (street_address_) | 178 if (street_address_) |
183 return true; | 179 return true; |
184 | 180 |
185 // This code may not pick up pages that have an address field consisting of a | 181 // This code may not pick up pages that have an address field consisting of a |
186 // sequence of unlabeled address fields. If we need to add this, see | 182 // sequence of unlabeled address fields. If we need to add this, see |
187 // discussion on https://codereview.chromium.org/741493003/ | 183 // discussion on https://codereview.chromium.org/741493003/ |
188 pattern = UTF8ToUTF16(kAddressLine2Re); | 184 if (!ParseField(scanner, kAddressLine2Re, &address2_) && |
189 label_pattern = UTF8ToUTF16(kAddressLine2LabelRe); | 185 !ParseFieldSpecifics(scanner, kAddressLine2LabelRe, |
190 if (!ParseField(scanner, pattern, &address2_) && | 186 MATCH_LABEL | MATCH_TEXT, &address2_)) |
191 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | |
192 &address2_)) | |
193 return true; | 187 return true; |
194 | 188 |
195 // Optionally parse address line 3. This uses the same label regexp as | 189 // Optionally parse address line 3. This uses the same label regexp as |
196 // address 2 above. | 190 // address 2 above. |
197 pattern = UTF8ToUTF16(kAddressLinesExtraRe); | 191 if (!ParseField(scanner, kAddressLinesExtraRe, &address3_) && |
198 if (!ParseField(scanner, pattern, &address3_) && | 192 !ParseFieldSpecifics(scanner, kAddressLinesExtraRe, |
Ilya Sherman
2015/11/26 02:25:09
Please use kAddressLine2LabelRe here, as the code
tfarina
2015/11/26 14:22:27
ops, that is what happens when you try to make pat
| |
199 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | 193 MATCH_LABEL | MATCH_TEXT, &address3_)) |
200 &address3_)) | |
201 return true; | 194 return true; |
202 | 195 |
203 // Try for surplus lines, which we will promptly discard. Some pages have 4 | 196 // Try for surplus lines, which we will promptly discard. Some pages have 4 |
204 // address lines (e.g. uk/ShoesDirect2.html)! | 197 // address lines (e.g. uk/ShoesDirect2.html)! |
205 // | 198 // |
206 // Since these are rare, don't bother considering unlabeled lines as extra | 199 // Since these are rare, don't bother considering unlabeled lines as extra |
207 // address lines. | 200 // address lines. |
208 pattern = UTF8ToUTF16(kAddressLinesExtraRe); | 201 while (ParseField(scanner, kAddressLinesExtraRe, NULL)) { |
209 while (ParseField(scanner, pattern, NULL)) { | |
210 // Consumed a surplus line, try for another. | 202 // Consumed a surplus line, try for another. |
211 } | 203 } |
212 return true; | 204 return true; |
213 } | 205 } |
214 | 206 |
215 bool AddressField::ParseCountry(AutofillScanner* scanner) { | 207 bool AddressField::ParseCountry(AutofillScanner* scanner) { |
216 if (country_ && !country_->IsEmpty()) | 208 if (country_ && !country_->IsEmpty()) |
217 return false; | 209 return false; |
218 | 210 |
219 scanner->SaveCursor(); | 211 scanner->SaveCursor(); |
220 if (ParseFieldSpecifics(scanner, | 212 if (ParseFieldSpecifics(scanner, kCountryRe, MATCH_DEFAULT | MATCH_SELECT, |
221 UTF8ToUTF16(kCountryRe), | |
222 MATCH_DEFAULT | MATCH_SELECT, | |
223 &country_)) { | 213 &country_)) { |
224 return true; | 214 return true; |
225 } | 215 } |
226 | 216 |
227 // The occasional page (e.g. google account registration page) calls this a | 217 // The occasional page (e.g. google account registration page) calls this a |
228 // "location". However, this only makes sense for select tags. | 218 // "location". However, this only makes sense for select tags. |
229 scanner->Rewind(); | 219 scanner->Rewind(); |
230 return ParseFieldSpecifics(scanner, | 220 return ParseFieldSpecifics(scanner, kCountryLocationRe, |
231 UTF8ToUTF16(kCountryLocationRe), | |
232 MATCH_LABEL | MATCH_NAME | MATCH_SELECT, | 221 MATCH_LABEL | MATCH_NAME | MATCH_SELECT, |
233 &country_); | 222 &country_); |
234 } | 223 } |
235 | 224 |
236 bool AddressField::ParseZipCode(AutofillScanner* scanner) { | 225 bool AddressField::ParseZipCode(AutofillScanner* scanner) { |
237 if (zip_) | 226 if (zip_) |
238 return false; | 227 return false; |
239 | 228 |
240 if (!ParseFieldSpecifics(scanner, | 229 if (!ParseFieldSpecifics(scanner, kZipCodeRe, kZipCodeMatchType, &zip_)) { |
241 UTF8ToUTF16(kZipCodeRe), | |
242 kZipCodeMatchType, | |
243 &zip_)) { | |
244 return false; | 230 return false; |
245 } | 231 } |
246 | 232 |
247 // Look for a zip+4, whose field name will also often contain | 233 // Look for a zip+4, whose field name will also often contain |
248 // the substring "zip". | 234 // the substring "zip". |
249 ParseFieldSpecifics(scanner, UTF8ToUTF16(kZip4Re), kZipCodeMatchType, &zip4_); | 235 ParseFieldSpecifics(scanner, kZip4Re, kZipCodeMatchType, &zip4_); |
250 return true; | 236 return true; |
251 } | 237 } |
252 | 238 |
253 bool AddressField::ParseCity(AutofillScanner* scanner) { | 239 bool AddressField::ParseCity(AutofillScanner* scanner) { |
254 if (city_) | 240 if (city_) |
255 return false; | 241 return false; |
256 | 242 |
257 return ParseFieldSpecifics(scanner, | 243 return ParseFieldSpecifics(scanner, kCityRe, kCityMatchType, &city_); |
258 UTF8ToUTF16(kCityRe), | |
259 kCityMatchType, | |
260 &city_); | |
261 } | 244 } |
262 | 245 |
263 bool AddressField::ParseState(AutofillScanner* scanner) { | 246 bool AddressField::ParseState(AutofillScanner* scanner) { |
264 if (state_) | 247 if (state_) |
265 return false; | 248 return false; |
266 | 249 |
267 return ParseFieldSpecifics(scanner, | 250 size_t saved_cursor = scanner->SaveCursor(); |
Ilya Sherman
2015/11/26 02:25:09
Please add a comment above this block of code like
tfarina
2015/11/26 14:22:27
Done.
| |
268 UTF8ToUTF16(kStateRe), | 251 if (ParseFieldSpecifics(scanner, "United States", kStateMatchType, nullptr)) { |
269 kStateMatchType, | 252 scanner->RewindTo(saved_cursor); |
270 &state_); | 253 return false; |
254 } | |
255 | |
256 return ParseFieldSpecifics(scanner, kStateRe, kStateMatchType, &state_); | |
271 } | 257 } |
272 | 258 |
273 bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) { | 259 bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) { |
274 // Simple cases. | 260 // Simple cases. |
275 if (scanner->IsEnd()) | 261 if (scanner->IsEnd()) |
276 return false; | 262 return false; |
277 if (city_ && state_ && zip_) | 263 if (city_ && state_ && zip_) |
278 return false; | 264 return false; |
279 if (state_ && zip_) | 265 if (state_ && zip_) |
280 return ParseCity(scanner); | 266 return ParseCity(scanner); |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
322 | 308 |
323 return false; | 309 return false; |
324 } | 310 } |
325 | 311 |
326 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode( | 312 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode( |
327 AutofillScanner* scanner) { | 313 AutofillScanner* scanner) { |
328 if (zip_) | 314 if (zip_) |
329 return RESULT_MATCH_NONE; | 315 return RESULT_MATCH_NONE; |
330 | 316 |
331 ParseNameLabelResult result = ParseNameAndLabelSeparately( | 317 ParseNameLabelResult result = ParseNameAndLabelSeparately( |
332 scanner, UTF8ToUTF16(kZipCodeRe), kZipCodeMatchType, &zip_); | 318 scanner, kZipCodeRe, kZipCodeMatchType, &zip_); |
333 | 319 |
334 if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd()) | 320 if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd()) |
335 return result; | 321 return result; |
336 | 322 |
337 size_t saved_cursor = scanner->SaveCursor(); | 323 size_t saved_cursor = scanner->SaveCursor(); |
338 bool found_non_zip4 = ParseCity(scanner); | 324 bool found_non_zip4 = ParseCity(scanner); |
339 if (found_non_zip4) | 325 if (found_non_zip4) |
340 city_ = nullptr; | 326 city_ = nullptr; |
341 scanner->RewindTo(saved_cursor); | 327 scanner->RewindTo(saved_cursor); |
342 if (!found_non_zip4) { | 328 if (!found_non_zip4) { |
343 found_non_zip4 = ParseState(scanner); | 329 found_non_zip4 = ParseState(scanner); |
344 if (found_non_zip4) | 330 if (found_non_zip4) |
345 state_ = nullptr; | 331 state_ = nullptr; |
346 scanner->RewindTo(saved_cursor); | 332 scanner->RewindTo(saved_cursor); |
347 } | 333 } |
348 | 334 |
349 if (!found_non_zip4) { | 335 if (!found_non_zip4) { |
350 // Look for a zip+4, whose field name will also often contain | 336 // Look for a zip+4, whose field name will also often contain |
351 // the substring "zip". | 337 // the substring "zip". |
352 ParseFieldSpecifics(scanner, | 338 ParseFieldSpecifics(scanner, kZip4Re, kZipCodeMatchType, &zip4_); |
353 UTF8ToUTF16(kZip4Re), | |
354 kZipCodeMatchType, | |
355 &zip4_); | |
356 } | 339 } |
357 return result; | 340 return result; |
358 } | 341 } |
359 | 342 |
360 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity( | 343 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity( |
361 AutofillScanner* scanner) { | 344 AutofillScanner* scanner) { |
362 if (city_) | 345 if (city_) |
363 return RESULT_MATCH_NONE; | 346 return RESULT_MATCH_NONE; |
364 | 347 |
365 return ParseNameAndLabelSeparately( | 348 return ParseNameAndLabelSeparately(scanner, kCityRe, kCityMatchType, &city_); |
366 scanner, UTF8ToUTF16(kCityRe), kCityMatchType, &city_); | |
367 } | 349 } |
368 | 350 |
369 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState( | 351 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState( |
370 AutofillScanner* scanner) { | 352 AutofillScanner* scanner) { |
371 if (state_) | 353 if (state_) |
372 return RESULT_MATCH_NONE; | 354 return RESULT_MATCH_NONE; |
373 | 355 |
374 return ParseNameAndLabelSeparately( | 356 size_t saved_cursor = scanner->SaveCursor(); |
375 scanner, UTF8ToUTF16(kStateRe), kStateMatchType, &state_); | 357 ParseNameLabelResult result = ParseNameAndLabelSeparately( |
358 scanner, "United States", kStateMatchType, nullptr); | |
359 | |
360 if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd()) | |
361 return result; | |
362 scanner->RewindTo(saved_cursor); | |
Ilya Sherman
2015/11/26 02:25:09
This logic is not correct. I still think it shoul
tfarina
2015/11/26 14:22:27
Done.
| |
363 | |
364 return ParseNameAndLabelSeparately(scanner, kStateRe, kStateMatchType, | |
365 &state_); | |
376 } | 366 } |
377 | 367 |
378 } // namespace autofill | 368 } // namespace autofill |
OLD | NEW |