OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/autofill/core/browser/address_field.h" | 5 #include "components/autofill/core/browser/address_field.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 | 8 |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
| 11 #include "base/strings/string16.h" |
11 #include "base/strings/string_util.h" | 12 #include "base/strings/string_util.h" |
| 13 #include "base/strings/utf_string_conversions.h" |
12 #include "components/autofill/core/browser/autofill_field.h" | 14 #include "components/autofill/core/browser/autofill_field.h" |
13 #include "components/autofill/core/browser/autofill_regex_constants.h" | 15 #include "components/autofill/core/browser/autofill_regex_constants.h" |
14 #include "components/autofill/core/browser/autofill_scanner.h" | 16 #include "components/autofill/core/browser/autofill_scanner.h" |
15 #include "components/autofill/core/browser/field_types.h" | 17 #include "components/autofill/core/browser/field_types.h" |
16 | 18 |
| 19 using base::UTF8ToUTF16; |
| 20 |
17 namespace autofill { | 21 namespace autofill { |
18 | 22 |
19 namespace { | 23 namespace { |
20 | 24 |
21 bool SetFieldAndAdvanceCursor(AutofillScanner* scanner, AutofillField** field) { | 25 bool SetFieldAndAdvanceCursor(AutofillScanner* scanner, AutofillField** field) { |
22 *field = scanner->Cursor(); | 26 *field = scanner->Cursor(); |
23 scanner->Advance(); | 27 scanner->Advance(); |
24 return true; | 28 return true; |
25 } | 29 } |
26 | 30 |
(...skipping 10 matching lines...) Expand all Loading... |
37 const int AddressField::kStateMatchType = MATCH_DEFAULT | MATCH_SELECT; | 41 const int AddressField::kStateMatchType = MATCH_DEFAULT | MATCH_SELECT; |
38 | 42 |
39 scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { | 43 scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { |
40 if (scanner->IsEnd()) | 44 if (scanner->IsEnd()) |
41 return NULL; | 45 return NULL; |
42 | 46 |
43 scoped_ptr<AddressField> address_field(new AddressField); | 47 scoped_ptr<AddressField> address_field(new AddressField); |
44 const AutofillField* const initial_field = scanner->Cursor(); | 48 const AutofillField* const initial_field = scanner->Cursor(); |
45 size_t saved_cursor = scanner->SaveCursor(); | 49 size_t saved_cursor = scanner->SaveCursor(); |
46 | 50 |
| 51 base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe); |
| 52 base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe); |
| 53 |
47 // Allow address fields to appear in any order. | 54 // Allow address fields to appear in any order. |
48 size_t begin_trailing_non_labeled_fields = 0; | 55 size_t begin_trailing_non_labeled_fields = 0; |
49 bool has_trailing_non_labeled_fields = false; | 56 bool has_trailing_non_labeled_fields = false; |
50 while (!scanner->IsEnd()) { | 57 while (!scanner->IsEnd()) { |
51 const size_t cursor = scanner->SaveCursor(); | 58 const size_t cursor = scanner->SaveCursor(); |
52 if (address_field->ParseAddressLines(scanner) || | 59 if (address_field->ParseAddressLines(scanner) || |
53 address_field->ParseCityStateZipCode(scanner) || | 60 address_field->ParseCityStateZipCode(scanner) || |
54 address_field->ParseCountry(scanner) || | 61 address_field->ParseCountry(scanner) || |
55 address_field->ParseCompany(scanner)) { | 62 address_field->ParseCompany(scanner)) { |
56 has_trailing_non_labeled_fields = false; | 63 has_trailing_non_labeled_fields = false; |
57 continue; | 64 continue; |
58 } else if (ParseField(scanner, kAttentionIgnoredRe, NULL) || | 65 } else if (ParseField(scanner, attention_ignored, NULL) || |
59 ParseField(scanner, kRegionIgnoredRe, NULL)) { | 66 ParseField(scanner, region_ignored, NULL)) { |
60 // We ignore the following: | 67 // We ignore the following: |
61 // * Attention. | 68 // * Attention. |
62 // * Province/Region/Other. | 69 // * Province/Region/Other. |
63 continue; | 70 continue; |
64 } else if (scanner->Cursor() != initial_field && | 71 } else if (scanner->Cursor() != initial_field && |
65 ParseEmptyLabel(scanner, NULL)) { | 72 ParseEmptyLabel(scanner, NULL)) { |
66 // Ignore non-labeled fields within an address; the page | 73 // Ignore non-labeled fields within an address; the page |
67 // MapQuest Driving Directions North America.html contains such a field. | 74 // MapQuest Driving Directions North America.html contains such a field. |
68 // We only ignore such fields after we've parsed at least one other field; | 75 // We only ignore such fields after we've parsed at least one other field; |
69 // otherwise we'd effectively parse address fields before other field | 76 // otherwise we'd effectively parse address fields before other field |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
134 AddClassification(city_, ADDRESS_HOME_CITY, map) && | 141 AddClassification(city_, ADDRESS_HOME_CITY, map) && |
135 AddClassification(state_, ADDRESS_HOME_STATE, map) && | 142 AddClassification(state_, ADDRESS_HOME_STATE, map) && |
136 AddClassification(zip_, ADDRESS_HOME_ZIP, map) && | 143 AddClassification(zip_, ADDRESS_HOME_ZIP, map) && |
137 AddClassification(country_, ADDRESS_HOME_COUNTRY, map); | 144 AddClassification(country_, ADDRESS_HOME_COUNTRY, map); |
138 } | 145 } |
139 | 146 |
140 bool AddressField::ParseCompany(AutofillScanner* scanner) { | 147 bool AddressField::ParseCompany(AutofillScanner* scanner) { |
141 if (company_ && !company_->IsEmpty()) | 148 if (company_ && !company_->IsEmpty()) |
142 return false; | 149 return false; |
143 | 150 |
144 return ParseField(scanner, kCompanyRe, &company_); | 151 return ParseField(scanner, UTF8ToUTF16(kCompanyRe), &company_); |
145 } | 152 } |
146 | 153 |
147 bool AddressField::ParseAddressLines(AutofillScanner* scanner) { | 154 bool AddressField::ParseAddressLines(AutofillScanner* scanner) { |
148 // We only match the string "address" in page text, not in element names, | 155 // We only match the string "address" in page text, not in element names, |
149 // because sometimes every element in a group of address fields will have | 156 // because sometimes every element in a group of address fields will have |
150 // a name containing the string "address"; for example, on the page | 157 // a name containing the string "address"; for example, on the page |
151 // Kohl's - Register Billing Address.html the text element labeled "city" | 158 // Kohl's - Register Billing Address.html the text element labeled "city" |
152 // has the name "BILL_TO_ADDRESS<>city". We do match address labels | 159 // has the name "BILL_TO_ADDRESS<>city". We do match address labels |
153 // such as "address1", which appear as element names on various pages (eg | 160 // such as "address1", which appear as element names on various pages (eg |
154 // AmericanGirl-Registration.html, BloomingdalesBilling.html, | 161 // AmericanGirl-Registration.html, BloomingdalesBilling.html, |
155 // EBay Registration Enter Information.html). | 162 // EBay Registration Enter Information.html). |
156 if (address1_ || street_address_) | 163 if (address1_ || street_address_) |
157 return false; | 164 return false; |
158 | 165 |
159 // Ignore "Address Lookup" field. http://crbug.com/427622 | 166 // Ignore "Address Lookup" field. http://crbug.com/427622 |
160 if (ParseField(scanner, kAddressLookupRe, NULL)) | 167 if (ParseField(scanner, base::UTF8ToUTF16(kAddressLookupRe), NULL)) |
161 return false; | 168 return false; |
162 | 169 |
163 if (!ParseFieldSpecifics(scanner, kAddressLine1Re, MATCH_DEFAULT, | 170 base::string16 pattern = UTF8ToUTF16(kAddressLine1Re); |
| 171 base::string16 label_pattern = UTF8ToUTF16(kAddressLine1LabelRe); |
| 172 if (!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, &address1_) && |
| 173 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
164 &address1_) && | 174 &address1_) && |
165 !ParseFieldSpecifics(scanner, kAddressLine1LabelRe, | 175 !ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_TEXT_AREA, |
166 MATCH_LABEL | MATCH_TEXT, &address1_) && | 176 &street_address_) && |
167 !ParseFieldSpecifics(scanner, kAddressLine1Re, | 177 !ParseFieldSpecifics(scanner, label_pattern, |
168 MATCH_DEFAULT | MATCH_TEXT_AREA, &street_address_) && | 178 MATCH_LABEL | MATCH_TEXT_AREA, |
169 !ParseFieldSpecifics(scanner, kAddressLine1LabelRe, | 179 &street_address_)) |
170 MATCH_LABEL | MATCH_TEXT_AREA, &street_address_)) | |
171 return false; | 180 return false; |
172 | 181 |
173 if (street_address_) | 182 if (street_address_) |
174 return true; | 183 return true; |
175 | 184 |
176 // This code may not pick up pages that have an address field consisting of a | 185 // This code may not pick up pages that have an address field consisting of a |
177 // sequence of unlabeled address fields. If we need to add this, see | 186 // sequence of unlabeled address fields. If we need to add this, see |
178 // discussion on https://codereview.chromium.org/741493003/ | 187 // discussion on https://codereview.chromium.org/741493003/ |
179 if (!ParseField(scanner, kAddressLine2Re, &address2_) && | 188 pattern = UTF8ToUTF16(kAddressLine2Re); |
180 !ParseFieldSpecifics(scanner, kAddressLine2LabelRe, | 189 label_pattern = UTF8ToUTF16(kAddressLine2LabelRe); |
181 MATCH_LABEL | MATCH_TEXT, &address2_)) | 190 if (!ParseField(scanner, pattern, &address2_) && |
| 191 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
| 192 &address2_)) |
182 return true; | 193 return true; |
183 | 194 |
184 // Optionally parse address line 3. This uses the same label regexp as | 195 // Optionally parse address line 3. This uses the same label regexp as |
185 // address 2 above. | 196 // address 2 above. |
186 if (!ParseField(scanner, kAddressLinesExtraRe, &address3_) && | 197 pattern = UTF8ToUTF16(kAddressLinesExtraRe); |
187 !ParseFieldSpecifics(scanner, kAddressLine2LabelRe, | 198 if (!ParseField(scanner, pattern, &address3_) && |
188 MATCH_LABEL | MATCH_TEXT, &address3_)) | 199 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
| 200 &address3_)) |
189 return true; | 201 return true; |
190 | 202 |
191 // Try for surplus lines, which we will promptly discard. Some pages have 4 | 203 // Try for surplus lines, which we will promptly discard. Some pages have 4 |
192 // address lines (e.g. uk/ShoesDirect2.html)! | 204 // address lines (e.g. uk/ShoesDirect2.html)! |
193 // | 205 // |
194 // Since these are rare, don't bother considering unlabeled lines as extra | 206 // Since these are rare, don't bother considering unlabeled lines as extra |
195 // address lines. | 207 // address lines. |
196 while (ParseField(scanner, kAddressLinesExtraRe, NULL)) { | 208 pattern = UTF8ToUTF16(kAddressLinesExtraRe); |
| 209 while (ParseField(scanner, pattern, NULL)) { |
197 // Consumed a surplus line, try for another. | 210 // Consumed a surplus line, try for another. |
198 } | 211 } |
199 return true; | 212 return true; |
200 } | 213 } |
201 | 214 |
202 bool AddressField::ParseCountry(AutofillScanner* scanner) { | 215 bool AddressField::ParseCountry(AutofillScanner* scanner) { |
203 if (country_ && !country_->IsEmpty()) | 216 if (country_ && !country_->IsEmpty()) |
204 return false; | 217 return false; |
205 | 218 |
206 scanner->SaveCursor(); | 219 scanner->SaveCursor(); |
207 if (ParseFieldSpecifics(scanner, kCountryRe, MATCH_DEFAULT | MATCH_SELECT, | 220 if (ParseFieldSpecifics(scanner, |
| 221 UTF8ToUTF16(kCountryRe), |
| 222 MATCH_DEFAULT | MATCH_SELECT, |
208 &country_)) { | 223 &country_)) { |
209 return true; | 224 return true; |
210 } | 225 } |
211 | 226 |
212 // The occasional page (e.g. google account registration page) calls this a | 227 // The occasional page (e.g. google account registration page) calls this a |
213 // "location". However, this only makes sense for select tags. | 228 // "location". However, this only makes sense for select tags. |
214 scanner->Rewind(); | 229 scanner->Rewind(); |
215 return ParseFieldSpecifics(scanner, kCountryLocationRe, | 230 return ParseFieldSpecifics(scanner, |
| 231 UTF8ToUTF16(kCountryLocationRe), |
216 MATCH_LABEL | MATCH_NAME | MATCH_SELECT, | 232 MATCH_LABEL | MATCH_NAME | MATCH_SELECT, |
217 &country_); | 233 &country_); |
218 } | 234 } |
219 | 235 |
220 bool AddressField::ParseZipCode(AutofillScanner* scanner) { | 236 bool AddressField::ParseZipCode(AutofillScanner* scanner) { |
221 if (zip_) | 237 if (zip_) |
222 return false; | 238 return false; |
223 | 239 |
224 if (!ParseFieldSpecifics(scanner, kZipCodeRe, kZipCodeMatchType, &zip_)) { | 240 if (!ParseFieldSpecifics(scanner, |
| 241 UTF8ToUTF16(kZipCodeRe), |
| 242 kZipCodeMatchType, |
| 243 &zip_)) { |
225 return false; | 244 return false; |
226 } | 245 } |
227 | 246 |
228 // Look for a zip+4, whose field name will also often contain | 247 // Look for a zip+4, whose field name will also often contain |
229 // the substring "zip". | 248 // the substring "zip". |
230 ParseFieldSpecifics(scanner, kZip4Re, kZipCodeMatchType, &zip4_); | 249 ParseFieldSpecifics(scanner, UTF8ToUTF16(kZip4Re), kZipCodeMatchType, &zip4_); |
231 return true; | 250 return true; |
232 } | 251 } |
233 | 252 |
234 bool AddressField::ParseCity(AutofillScanner* scanner) { | 253 bool AddressField::ParseCity(AutofillScanner* scanner) { |
235 if (city_) | 254 if (city_) |
236 return false; | 255 return false; |
237 | 256 |
238 return ParseFieldSpecifics(scanner, kCityRe, kCityMatchType, &city_); | 257 return ParseFieldSpecifics(scanner, |
| 258 UTF8ToUTF16(kCityRe), |
| 259 kCityMatchType, |
| 260 &city_); |
239 } | 261 } |
240 | 262 |
241 bool AddressField::ParseState(AutofillScanner* scanner) { | 263 bool AddressField::ParseState(AutofillScanner* scanner) { |
242 if (state_) | 264 if (state_) |
243 return false; | 265 return false; |
244 | 266 |
245 // Ignore spurious matches for "United States". | 267 return ParseFieldSpecifics(scanner, |
246 size_t saved_cursor = scanner->SaveCursor(); | 268 UTF8ToUTF16(kStateRe), |
247 if (ParseFieldSpecifics(scanner, "United States", kStateMatchType, nullptr)) { | 269 kStateMatchType, |
248 scanner->RewindTo(saved_cursor); | 270 &state_); |
249 return false; | |
250 } | |
251 | |
252 return ParseFieldSpecifics(scanner, kStateRe, kStateMatchType, &state_); | |
253 } | 271 } |
254 | 272 |
255 bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) { | 273 bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) { |
256 // Simple cases. | 274 // Simple cases. |
257 if (scanner->IsEnd()) | 275 if (scanner->IsEnd()) |
258 return false; | 276 return false; |
259 if (city_ && state_ && zip_) | 277 if (city_ && state_ && zip_) |
260 return false; | 278 return false; |
261 if (state_ && zip_) | 279 if (state_ && zip_) |
262 return ParseCity(scanner); | 280 return ParseCity(scanner); |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
304 | 322 |
305 return false; | 323 return false; |
306 } | 324 } |
307 | 325 |
308 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode( | 326 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode( |
309 AutofillScanner* scanner) { | 327 AutofillScanner* scanner) { |
310 if (zip_) | 328 if (zip_) |
311 return RESULT_MATCH_NONE; | 329 return RESULT_MATCH_NONE; |
312 | 330 |
313 ParseNameLabelResult result = ParseNameAndLabelSeparately( | 331 ParseNameLabelResult result = ParseNameAndLabelSeparately( |
314 scanner, kZipCodeRe, kZipCodeMatchType, &zip_); | 332 scanner, UTF8ToUTF16(kZipCodeRe), kZipCodeMatchType, &zip_); |
315 | 333 |
316 if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd()) | 334 if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd()) |
317 return result; | 335 return result; |
318 | 336 |
319 size_t saved_cursor = scanner->SaveCursor(); | 337 size_t saved_cursor = scanner->SaveCursor(); |
320 bool found_non_zip4 = ParseCity(scanner); | 338 bool found_non_zip4 = ParseCity(scanner); |
321 if (found_non_zip4) | 339 if (found_non_zip4) |
322 city_ = nullptr; | 340 city_ = nullptr; |
323 scanner->RewindTo(saved_cursor); | 341 scanner->RewindTo(saved_cursor); |
324 if (!found_non_zip4) { | 342 if (!found_non_zip4) { |
325 found_non_zip4 = ParseState(scanner); | 343 found_non_zip4 = ParseState(scanner); |
326 if (found_non_zip4) | 344 if (found_non_zip4) |
327 state_ = nullptr; | 345 state_ = nullptr; |
328 scanner->RewindTo(saved_cursor); | 346 scanner->RewindTo(saved_cursor); |
329 } | 347 } |
330 | 348 |
331 if (!found_non_zip4) { | 349 if (!found_non_zip4) { |
332 // Look for a zip+4, whose field name will also often contain | 350 // Look for a zip+4, whose field name will also often contain |
333 // the substring "zip". | 351 // the substring "zip". |
334 ParseFieldSpecifics(scanner, kZip4Re, kZipCodeMatchType, &zip4_); | 352 ParseFieldSpecifics(scanner, |
| 353 UTF8ToUTF16(kZip4Re), |
| 354 kZipCodeMatchType, |
| 355 &zip4_); |
335 } | 356 } |
336 return result; | 357 return result; |
337 } | 358 } |
338 | 359 |
339 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity( | 360 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity( |
340 AutofillScanner* scanner) { | 361 AutofillScanner* scanner) { |
341 if (city_) | 362 if (city_) |
342 return RESULT_MATCH_NONE; | 363 return RESULT_MATCH_NONE; |
343 | 364 |
344 return ParseNameAndLabelSeparately(scanner, kCityRe, kCityMatchType, &city_); | 365 return ParseNameAndLabelSeparately( |
| 366 scanner, UTF8ToUTF16(kCityRe), kCityMatchType, &city_); |
345 } | 367 } |
346 | 368 |
347 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState( | 369 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState( |
348 AutofillScanner* scanner) { | 370 AutofillScanner* scanner) { |
349 if (state_) | 371 if (state_) |
350 return RESULT_MATCH_NONE; | 372 return RESULT_MATCH_NONE; |
351 | 373 |
352 size_t saved_cursor = scanner->SaveCursor(); | 374 return ParseNameAndLabelSeparately( |
353 if (ParseFieldSpecifics(scanner, "United States", kStateMatchType, nullptr)) { | 375 scanner, UTF8ToUTF16(kStateRe), kStateMatchType, &state_); |
354 scanner->RewindTo(saved_cursor); | |
355 return RESULT_MATCH_NONE; | |
356 } | |
357 | |
358 return ParseNameAndLabelSeparately(scanner, kStateRe, kStateMatchType, | |
359 &state_); | |
360 } | 376 } |
361 | 377 |
362 } // namespace autofill | 378 } // namespace autofill |
OLD | NEW |