OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/autofill/address_field.h" | 5 #include "chrome/browser/autofill/address_field.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 | 8 |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
11 #include "base/string16.h" | 11 #include "base/string16.h" |
12 #include "base/string_util.h" | 12 #include "base/string_util.h" |
13 #include "base/utf_string_conversions.h" | 13 #include "base/utf_string_conversions.h" |
14 #include "chrome/browser/autofill/autofill_ecml.h" | |
15 #include "chrome/browser/autofill/autofill_field.h" | 14 #include "chrome/browser/autofill/autofill_field.h" |
16 #include "chrome/browser/autofill/autofill_scanner.h" | 15 #include "chrome/browser/autofill/autofill_scanner.h" |
17 #include "grit/autofill_resources.h" | 16 #include "grit/autofill_resources.h" |
18 #include "ui/base/l10n/l10n_util.h" | 17 #include "ui/base/l10n/l10n_util.h" |
19 | 18 |
20 using autofill::GetEcmlPattern; | 19 FormField* AddressField::Parse(AutofillScanner* scanner) { |
21 | |
22 FormField* AddressField::Parse(AutofillScanner* scanner, bool is_ecml) { | |
23 if (scanner->IsEnd()) | 20 if (scanner->IsEnd()) |
24 return NULL; | 21 return NULL; |
25 | 22 |
26 scoped_ptr<AddressField> address_field(new AddressField); | 23 scoped_ptr<AddressField> address_field(new AddressField); |
27 const AutofillField* const initial_field = scanner->Cursor(); | 24 const AutofillField* const initial_field = scanner->Cursor(); |
28 size_t saved_cursor = scanner->SaveCursor(); | 25 size_t saved_cursor = scanner->SaveCursor(); |
29 | 26 |
30 string16 attention_ignored = | 27 string16 attention_ignored = |
31 l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE); | 28 l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE); |
32 string16 region_ignored = | 29 string16 region_ignored = |
33 l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE); | 30 l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE); |
34 | 31 |
35 // Allow address fields to appear in any order. | 32 // Allow address fields to appear in any order. |
36 size_t begin_trailing_non_labeled_fields = 0; | 33 size_t begin_trailing_non_labeled_fields = 0; |
37 bool has_trailing_non_labeled_fields = false; | 34 bool has_trailing_non_labeled_fields = false; |
38 while (!scanner->IsEnd()) { | 35 while (!scanner->IsEnd()) { |
39 const size_t cursor = scanner->SaveCursor(); | 36 const size_t cursor = scanner->SaveCursor(); |
40 if (ParseAddressLines(scanner, is_ecml, address_field.get()) || | 37 if (ParseAddressLines(scanner, address_field.get()) || |
41 ParseCity(scanner, is_ecml, address_field.get()) || | 38 ParseCity(scanner, address_field.get()) || |
42 ParseState(scanner, is_ecml, address_field.get()) || | 39 ParseState(scanner, address_field.get()) || |
43 ParseZipCode(scanner, is_ecml, address_field.get()) || | 40 ParseZipCode(scanner, address_field.get()) || |
44 ParseCountry(scanner, is_ecml, address_field.get()) || | 41 ParseCountry(scanner, address_field.get()) || |
45 ParseCompany(scanner, is_ecml, address_field.get())) { | 42 ParseCompany(scanner, address_field.get())) { |
46 has_trailing_non_labeled_fields = false; | 43 has_trailing_non_labeled_fields = false; |
47 continue; | 44 continue; |
48 } else if (ParseField(scanner, attention_ignored, NULL) || | 45 } else if (ParseField(scanner, attention_ignored, NULL) || |
49 ParseField(scanner, region_ignored, NULL)) { | 46 ParseField(scanner, region_ignored, NULL)) { |
50 // We ignore the following: | 47 // We ignore the following: |
51 // * Attention. | 48 // * Attention. |
52 // * Province/Region/Other. | 49 // * Province/Region/Other. |
53 continue; | 50 continue; |
54 } else if (scanner->Cursor() != initial_field && | 51 } else if (scanner->Cursor() != initial_field && |
55 ParseEmptyLabel(scanner, NULL)) { | 52 ParseEmptyLabel(scanner, NULL)) { |
(...skipping 30 matching lines...) Expand all Loading... |
86 address_field->type_ = address_field->FindType(); | 83 address_field->type_ = address_field->FindType(); |
87 return address_field.release(); | 84 return address_field.release(); |
88 } | 85 } |
89 | 86 |
90 scanner->RewindTo(saved_cursor); | 87 scanner->RewindTo(saved_cursor); |
91 return NULL; | 88 return NULL; |
92 } | 89 } |
93 | 90 |
94 AddressType AddressField::FindType() const { | 91 AddressType AddressField::FindType() const { |
95 // First look at the field name, which itself will sometimes contain | 92 // First look at the field name, which itself will sometimes contain |
96 // "bill" or "ship". We could check for the ECML type prefixes | 93 // "bill" or "ship". |
97 // here, but there's no need to since ECML's prefixes Ecom_BillTo | |
98 // and Ecom_ShipTo contain "bill" and "ship" anyway. | |
99 if (company_) { | 94 if (company_) { |
100 string16 name = StringToLowerASCII(company_->name); | 95 string16 name = StringToLowerASCII(company_->name); |
101 return AddressTypeFromText(name); | 96 return AddressTypeFromText(name); |
102 } | 97 } |
103 if (address1_) { | 98 if (address1_) { |
104 string16 name = StringToLowerASCII(address1_->name); | 99 string16 name = StringToLowerASCII(address1_->name); |
105 return AddressTypeFromText(name); | 100 return AddressTypeFromText(name); |
106 } | 101 } |
107 if (address2_) { | 102 if (address2_) { |
108 string16 name = StringToLowerASCII(address2_->name); | 103 string16 name = StringToLowerASCII(address2_->name); |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
182 ok = ok && AddClassification(address2_, address_line2, map); | 177 ok = ok && AddClassification(address2_, address_line2, map); |
183 ok = ok && AddClassification(city_, address_city, map); | 178 ok = ok && AddClassification(city_, address_city, map); |
184 ok = ok && AddClassification(state_, address_state, map); | 179 ok = ok && AddClassification(state_, address_state, map); |
185 ok = ok && AddClassification(zip_, address_zip, map); | 180 ok = ok && AddClassification(zip_, address_zip, map); |
186 ok = ok && AddClassification(country_, address_country, map); | 181 ok = ok && AddClassification(country_, address_country, map); |
187 return ok; | 182 return ok; |
188 } | 183 } |
189 | 184 |
190 // static | 185 // static |
191 bool AddressField::ParseCompany(AutofillScanner* scanner, | 186 bool AddressField::ParseCompany(AutofillScanner* scanner, |
192 bool is_ecml, | |
193 AddressField* address_field) { | 187 AddressField* address_field) { |
194 if (address_field->company_ && !address_field->company_->IsEmpty()) | 188 if (address_field->company_ && !address_field->company_->IsEmpty()) |
195 return false; | 189 return false; |
196 | 190 |
197 string16 pattern; | 191 return ParseField(scanner, l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE), |
198 if (is_ecml) { | 192 &address_field->company_); |
199 pattern = GetEcmlPattern(kEcmlShipToCompanyName, | |
200 kEcmlBillToCompanyName, '|'); | |
201 } else { | |
202 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE); | |
203 } | |
204 | |
205 return ParseField(scanner, pattern, &address_field->company_); | |
206 } | 193 } |
207 | 194 |
208 // static | 195 // static |
209 bool AddressField::ParseAddressLines(AutofillScanner* scanner, | 196 bool AddressField::ParseAddressLines(AutofillScanner* scanner, |
210 bool is_ecml, | |
211 AddressField* address_field) { | 197 AddressField* address_field) { |
212 // We only match the string "address" in page text, not in element names, | 198 // We only match the string "address" in page text, not in element names, |
213 // because sometimes every element in a group of address fields will have | 199 // because sometimes every element in a group of address fields will have |
214 // a name containing the string "address"; for example, on the page | 200 // a name containing the string "address"; for example, on the page |
215 // Kohl's - Register Billing Address.html the text element labeled "city" | 201 // Kohl's - Register Billing Address.html the text element labeled "city" |
216 // has the name "BILL_TO_ADDRESS<>city". We do match address labels | 202 // has the name "BILL_TO_ADDRESS<>city". We do match address labels |
217 // such as "address1", which appear as element names on various pages (eg | 203 // such as "address1", which appear as element names on various pages (eg |
218 // AmericanGirl-Registration.html, BloomingdalesBilling.html, | 204 // AmericanGirl-Registration.html, BloomingdalesBilling.html, |
219 // EBay Registration Enter Information.html). | 205 // EBay Registration Enter Information.html). |
220 if (address_field->address1_) | 206 if (address_field->address1_) |
221 return false; | 207 return false; |
222 | 208 |
223 string16 pattern; | 209 string16 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE); |
224 if (is_ecml) { | 210 string16 label_pattern = |
225 pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|'); | 211 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); |
226 if (!ParseField(scanner, pattern, &address_field->address1_)) | |
227 return false; | |
228 } else { | |
229 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE); | |
230 string16 label_pattern = | |
231 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); | |
232 | 212 |
233 if (!ParseField(scanner, pattern, &address_field->address1_) && | 213 if (!ParseField(scanner, pattern, &address_field->address1_) && |
234 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | 214 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
235 &address_field->address1_)) { | 215 &address_field->address1_)) { |
236 return false; | 216 return false; |
237 } | |
238 } | 217 } |
239 | 218 |
240 // Optionally parse more address lines, which may have empty labels. | 219 // Optionally parse more address lines, which may have empty labels. |
241 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) | 220 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) |
242 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! | 221 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! |
243 if (is_ecml) { | 222 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE); |
244 pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|'); | 223 label_pattern = |
245 if (!ParseEmptyLabel(scanner, &address_field->address2_)) | 224 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); |
246 ParseField(scanner, pattern, &address_field->address2_); | 225 if (!ParseEmptyLabel(scanner, &address_field->address2_) && |
247 } else { | 226 !ParseField(scanner, pattern, &address_field->address2_)) { |
248 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE); | 227 ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
249 string16 label_pattern = | 228 &address_field->address2_); |
250 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); | |
251 if (!ParseEmptyLabel(scanner, &address_field->address2_) && | |
252 !ParseField(scanner, pattern, &address_field->address2_)) { | |
253 ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | |
254 &address_field->address2_); | |
255 } | |
256 } | 229 } |
257 | 230 |
258 // Try for a third line, which we will promptly discard. | 231 // Try for a third line, which we will promptly discard. |
259 if (address_field->address2_ != NULL) { | 232 if (address_field->address2_ != NULL) { |
260 if (is_ecml) { | 233 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE); |
261 pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|'); | 234 ParseField(scanner, pattern, NULL); |
262 ParseField(scanner, pattern, NULL); | |
263 } else { | |
264 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE); | |
265 ParseField(scanner, pattern, NULL); | |
266 } | |
267 } | 235 } |
268 | 236 |
269 return true; | 237 return true; |
270 } | 238 } |
271 | 239 |
272 // static | 240 // static |
273 bool AddressField::ParseCountry(AutofillScanner* scanner, | 241 bool AddressField::ParseCountry(AutofillScanner* scanner, |
274 bool is_ecml, | |
275 AddressField* address_field) { | 242 AddressField* address_field) { |
276 // Parse a country. The occasional page (e.g. | 243 // Parse a country. The occasional page (e.g. |
277 // Travelocity_New Member Information1.html) calls this a "location". | 244 // Travelocity_New Member Information1.html) calls this a "location". |
278 // Note: ECML standard uses 2 letter country code (ISO 3166) | |
279 if (address_field->country_ && !address_field->country_->IsEmpty()) | 245 if (address_field->country_ && !address_field->country_->IsEmpty()) |
280 return false; | 246 return false; |
281 | 247 |
282 string16 pattern; | 248 return ParseFieldSpecifics(scanner, |
283 if (is_ecml) | 249 l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE), |
284 pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|'); | 250 MATCH_DEFAULT | MATCH_SELECT, |
285 else | |
286 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE); | |
287 | |
288 return ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_SELECT, | |
289 &address_field->country_); | 251 &address_field->country_); |
290 } | 252 } |
291 | 253 |
292 // static | 254 // static |
293 bool AddressField::ParseZipCode(AutofillScanner* scanner, | 255 bool AddressField::ParseZipCode(AutofillScanner* scanner, |
294 bool is_ecml, | |
295 AddressField* address_field) { | 256 AddressField* address_field) { |
296 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this | 257 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this |
297 // is called a "post code". | 258 // is called a "post code". |
298 // | 259 // |
299 // HACK: Just for the MapQuest driving directions page we match the | 260 // HACK: Just for the MapQuest driving directions page we match the |
300 // exact name "1z", which MapQuest uses to label its zip code field. | 261 // exact name "1z", which MapQuest uses to label its zip code field. |
301 // Hopefully before long we'll be smart enough to find the zip code | 262 // Hopefully before long we'll be smart enough to find the zip code |
302 // on that page automatically. | 263 // on that page automatically. |
303 if (address_field->zip_) | 264 if (address_field->zip_) |
304 return false; | 265 return false; |
305 | 266 |
306 string16 pattern; | 267 string16 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_CODE_RE); |
307 if (is_ecml) { | |
308 pattern = GetEcmlPattern(kEcmlShipToPostalCode, kEcmlBillToPostalCode, '|'); | |
309 } else { | |
310 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_CODE_RE); | |
311 } | |
312 | |
313 AddressType tempType; | |
314 string16 name = scanner->Cursor()->name; | |
315 | |
316 // Note: comparisons using the ECML compliant name as a prefix must be used in | |
317 // order to accommodate Google Checkout. See |GetEcmlPattern| for more detail. | |
318 string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode)); | |
319 if (StartsWith(name, bill_to_postal_code_field, false)) { | |
320 tempType = kBillingAddress; | |
321 } else if (StartsWith(name, bill_to_postal_code_field, false)) { | |
322 tempType = kShippingAddress; | |
323 } else { | |
324 tempType = kGenericAddress; | |
325 } | |
326 | |
327 if (!ParseField(scanner, pattern, &address_field->zip_)) | 268 if (!ParseField(scanner, pattern, &address_field->zip_)) |
328 return false; | 269 return false; |
329 | 270 |
330 address_field->type_ = tempType; | 271 address_field->type_ = kGenericAddress; |
331 if (!is_ecml) { | 272 // Look for a zip+4, whose field name will also often contain |
332 // Look for a zip+4, whose field name will also often contain | 273 // the substring "zip". |
333 // the substring "zip". | 274 ParseField(scanner, |
334 ParseField(scanner, | 275 l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE), |
335 l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE), | 276 &address_field->zip4_); |
336 &address_field->zip4_); | |
337 } | |
338 | 277 |
339 return true; | 278 return true; |
340 } | 279 } |
341 | 280 |
342 // static | 281 // static |
343 bool AddressField::ParseCity(AutofillScanner* scanner, | 282 bool AddressField::ParseCity(AutofillScanner* scanner, |
344 bool is_ecml, | |
345 AddressField* address_field) { | 283 AddressField* address_field) { |
346 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use | 284 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use |
347 // the term "town". | 285 // the term "town". |
348 if (address_field->city_) | 286 if (address_field->city_) |
349 return false; | 287 return false; |
350 | 288 |
351 string16 pattern; | |
352 if (is_ecml) | |
353 pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|'); | |
354 else | |
355 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE); | |
356 | |
357 // Select fields are allowed here. This occurs on top-100 site rediff.com. | 289 // Select fields are allowed here. This occurs on top-100 site rediff.com. |
358 return ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_SELECT, | 290 return ParseFieldSpecifics(scanner, |
| 291 l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE), |
| 292 MATCH_DEFAULT | MATCH_SELECT, |
359 &address_field->city_); | 293 &address_field->city_); |
360 } | 294 } |
361 | 295 |
362 // static | 296 // static |
363 bool AddressField::ParseState(AutofillScanner* scanner, | 297 bool AddressField::ParseState(AutofillScanner* scanner, |
364 bool is_ecml, | |
365 AddressField* address_field) { | 298 AddressField* address_field) { |
366 if (address_field->state_) | 299 if (address_field->state_) |
367 return false; | 300 return false; |
368 | 301 |
369 string16 pattern; | 302 return ParseFieldSpecifics(scanner, |
370 if (is_ecml) | 303 l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE), |
371 pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|'); | 304 MATCH_DEFAULT | MATCH_SELECT, |
372 else | |
373 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE); | |
374 | |
375 return ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_SELECT, | |
376 &address_field->state_); | 305 &address_field->state_); |
377 } | 306 } |
378 | 307 |
379 AddressType AddressField::AddressTypeFromText(const string16 &text) { | 308 AddressType AddressField::AddressTypeFromText(const string16 &text) { |
380 if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE)) | 309 if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE)) |
381 != string16::npos || | 310 != string16::npos || |
382 text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE)) | 311 text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE)) |
383 != string16::npos) | 312 != string16::npos) |
384 // This text could be a checkbox label such as "same as my billing | 313 // This text could be a checkbox label such as "same as my billing |
385 // address" or "use my shipping address". | 314 // address" or "use my shipping address". |
(...skipping 16 matching lines...) Expand all Loading... |
402 return kBillingAddress; | 331 return kBillingAddress; |
403 | 332 |
404 if (bill == string16::npos && ship != string16::npos) | 333 if (bill == string16::npos && ship != string16::npos) |
405 return kShippingAddress; | 334 return kShippingAddress; |
406 | 335 |
407 if (bill > ship) | 336 if (bill > ship) |
408 return kBillingAddress; | 337 return kBillingAddress; |
409 | 338 |
410 return kShippingAddress; | 339 return kShippingAddress; |
411 } | 340 } |
OLD | NEW |