| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/autofill/address_field.h" | 5 #include "chrome/browser/autofill/address_field.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 | 8 |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
| 11 #include "base/string16.h" | 11 #include "base/string16.h" |
| 12 #include "base/string_util.h" | 12 #include "base/string_util.h" |
| 13 #include "base/utf_string_conversions.h" | 13 #include "base/utf_string_conversions.h" |
| 14 #include "chrome/browser/autofill/autofill_ecml.h" | |
| 15 #include "chrome/browser/autofill/autofill_field.h" | 14 #include "chrome/browser/autofill/autofill_field.h" |
| 16 #include "chrome/browser/autofill/autofill_scanner.h" | 15 #include "chrome/browser/autofill/autofill_scanner.h" |
| 17 #include "grit/autofill_resources.h" | 16 #include "grit/autofill_resources.h" |
| 18 #include "ui/base/l10n/l10n_util.h" | 17 #include "ui/base/l10n/l10n_util.h" |
| 19 | 18 |
| 20 using autofill::GetEcmlPattern; | 19 FormField* AddressField::Parse(AutofillScanner* scanner) { |
| 21 | |
| 22 FormField* AddressField::Parse(AutofillScanner* scanner, bool is_ecml) { | |
| 23 if (scanner->IsEnd()) | 20 if (scanner->IsEnd()) |
| 24 return NULL; | 21 return NULL; |
| 25 | 22 |
| 26 scoped_ptr<AddressField> address_field(new AddressField); | 23 scoped_ptr<AddressField> address_field(new AddressField); |
| 27 const AutofillField* const initial_field = scanner->Cursor(); | 24 const AutofillField* const initial_field = scanner->Cursor(); |
| 28 size_t saved_cursor = scanner->SaveCursor(); | 25 size_t saved_cursor = scanner->SaveCursor(); |
| 29 | 26 |
| 30 string16 attention_ignored = | 27 string16 attention_ignored = |
| 31 l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE); | 28 l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE); |
| 32 string16 region_ignored = | 29 string16 region_ignored = |
| 33 l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE); | 30 l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE); |
| 34 | 31 |
| 35 // Allow address fields to appear in any order. | 32 // Allow address fields to appear in any order. |
| 36 size_t begin_trailing_non_labeled_fields = 0; | 33 size_t begin_trailing_non_labeled_fields = 0; |
| 37 bool has_trailing_non_labeled_fields = false; | 34 bool has_trailing_non_labeled_fields = false; |
| 38 while (!scanner->IsEnd()) { | 35 while (!scanner->IsEnd()) { |
| 39 const size_t cursor = scanner->SaveCursor(); | 36 const size_t cursor = scanner->SaveCursor(); |
| 40 if (ParseAddressLines(scanner, is_ecml, address_field.get()) || | 37 if (ParseAddressLines(scanner, address_field.get()) || |
| 41 ParseCity(scanner, is_ecml, address_field.get()) || | 38 ParseCity(scanner, address_field.get()) || |
| 42 ParseState(scanner, is_ecml, address_field.get()) || | 39 ParseState(scanner, address_field.get()) || |
| 43 ParseZipCode(scanner, is_ecml, address_field.get()) || | 40 ParseZipCode(scanner, address_field.get()) || |
| 44 ParseCountry(scanner, is_ecml, address_field.get()) || | 41 ParseCountry(scanner, address_field.get()) || |
| 45 ParseCompany(scanner, is_ecml, address_field.get())) { | 42 ParseCompany(scanner, address_field.get())) { |
| 46 has_trailing_non_labeled_fields = false; | 43 has_trailing_non_labeled_fields = false; |
| 47 continue; | 44 continue; |
| 48 } else if (ParseField(scanner, attention_ignored, NULL) || | 45 } else if (ParseField(scanner, attention_ignored, NULL) || |
| 49 ParseField(scanner, region_ignored, NULL)) { | 46 ParseField(scanner, region_ignored, NULL)) { |
| 50 // We ignore the following: | 47 // We ignore the following: |
| 51 // * Attention. | 48 // * Attention. |
| 52 // * Province/Region/Other. | 49 // * Province/Region/Other. |
| 53 continue; | 50 continue; |
| 54 } else if (scanner->Cursor() != initial_field && | 51 } else if (scanner->Cursor() != initial_field && |
| 55 ParseEmptyLabel(scanner, NULL)) { | 52 ParseEmptyLabel(scanner, NULL)) { |
| (...skipping 30 matching lines...) Expand all Loading... |
| 86 address_field->type_ = address_field->FindType(); | 83 address_field->type_ = address_field->FindType(); |
| 87 return address_field.release(); | 84 return address_field.release(); |
| 88 } | 85 } |
| 89 | 86 |
| 90 scanner->RewindTo(saved_cursor); | 87 scanner->RewindTo(saved_cursor); |
| 91 return NULL; | 88 return NULL; |
| 92 } | 89 } |
| 93 | 90 |
| 94 AddressType AddressField::FindType() const { | 91 AddressType AddressField::FindType() const { |
| 95 // First look at the field name, which itself will sometimes contain | 92 // First look at the field name, which itself will sometimes contain |
| 96 // "bill" or "ship". We could check for the ECML type prefixes | 93 // "bill" or "ship". |
| 97 // here, but there's no need to since ECML's prefixes Ecom_BillTo | |
| 98 // and Ecom_ShipTo contain "bill" and "ship" anyway. | |
| 99 if (company_) { | 94 if (company_) { |
| 100 string16 name = StringToLowerASCII(company_->name); | 95 string16 name = StringToLowerASCII(company_->name); |
| 101 return AddressTypeFromText(name); | 96 return AddressTypeFromText(name); |
| 102 } | 97 } |
| 103 if (address1_) { | 98 if (address1_) { |
| 104 string16 name = StringToLowerASCII(address1_->name); | 99 string16 name = StringToLowerASCII(address1_->name); |
| 105 return AddressTypeFromText(name); | 100 return AddressTypeFromText(name); |
| 106 } | 101 } |
| 107 if (address2_) { | 102 if (address2_) { |
| 108 string16 name = StringToLowerASCII(address2_->name); | 103 string16 name = StringToLowerASCII(address2_->name); |
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 182 ok = ok && AddClassification(address2_, address_line2, map); | 177 ok = ok && AddClassification(address2_, address_line2, map); |
| 183 ok = ok && AddClassification(city_, address_city, map); | 178 ok = ok && AddClassification(city_, address_city, map); |
| 184 ok = ok && AddClassification(state_, address_state, map); | 179 ok = ok && AddClassification(state_, address_state, map); |
| 185 ok = ok && AddClassification(zip_, address_zip, map); | 180 ok = ok && AddClassification(zip_, address_zip, map); |
| 186 ok = ok && AddClassification(country_, address_country, map); | 181 ok = ok && AddClassification(country_, address_country, map); |
| 187 return ok; | 182 return ok; |
| 188 } | 183 } |
| 189 | 184 |
| 190 // static | 185 // static |
| 191 bool AddressField::ParseCompany(AutofillScanner* scanner, | 186 bool AddressField::ParseCompany(AutofillScanner* scanner, |
| 192 bool is_ecml, | |
| 193 AddressField* address_field) { | 187 AddressField* address_field) { |
| 194 if (address_field->company_ && !address_field->company_->IsEmpty()) | 188 if (address_field->company_ && !address_field->company_->IsEmpty()) |
| 195 return false; | 189 return false; |
| 196 | 190 |
| 197 string16 pattern; | 191 return ParseField(scanner, l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE), |
| 198 if (is_ecml) { | 192 &address_field->company_); |
| 199 pattern = GetEcmlPattern(kEcmlShipToCompanyName, | |
| 200 kEcmlBillToCompanyName, '|'); | |
| 201 } else { | |
| 202 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE); | |
| 203 } | |
| 204 | |
| 205 return ParseField(scanner, pattern, &address_field->company_); | |
| 206 } | 193 } |
| 207 | 194 |
| 208 // static | 195 // static |
| 209 bool AddressField::ParseAddressLines(AutofillScanner* scanner, | 196 bool AddressField::ParseAddressLines(AutofillScanner* scanner, |
| 210 bool is_ecml, | |
| 211 AddressField* address_field) { | 197 AddressField* address_field) { |
| 212 // We only match the string "address" in page text, not in element names, | 198 // We only match the string "address" in page text, not in element names, |
| 213 // because sometimes every element in a group of address fields will have | 199 // because sometimes every element in a group of address fields will have |
| 214 // a name containing the string "address"; for example, on the page | 200 // a name containing the string "address"; for example, on the page |
| 215 // Kohl's - Register Billing Address.html the text element labeled "city" | 201 // Kohl's - Register Billing Address.html the text element labeled "city" |
| 216 // has the name "BILL_TO_ADDRESS<>city". We do match address labels | 202 // has the name "BILL_TO_ADDRESS<>city". We do match address labels |
| 217 // such as "address1", which appear as element names on various pages (eg | 203 // such as "address1", which appear as element names on various pages (eg |
| 218 // AmericanGirl-Registration.html, BloomingdalesBilling.html, | 204 // AmericanGirl-Registration.html, BloomingdalesBilling.html, |
| 219 // EBay Registration Enter Information.html). | 205 // EBay Registration Enter Information.html). |
| 220 if (address_field->address1_) | 206 if (address_field->address1_) |
| 221 return false; | 207 return false; |
| 222 | 208 |
| 223 string16 pattern; | 209 string16 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE); |
| 224 if (is_ecml) { | 210 string16 label_pattern = |
| 225 pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|'); | 211 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); |
| 226 if (!ParseField(scanner, pattern, &address_field->address1_)) | |
| 227 return false; | |
| 228 } else { | |
| 229 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE); | |
| 230 string16 label_pattern = | |
| 231 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); | |
| 232 | 212 |
| 233 if (!ParseField(scanner, pattern, &address_field->address1_) && | 213 if (!ParseField(scanner, pattern, &address_field->address1_) && |
| 234 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | 214 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
| 235 &address_field->address1_)) { | 215 &address_field->address1_)) { |
| 236 return false; | 216 return false; |
| 237 } | |
| 238 } | 217 } |
| 239 | 218 |
| 240 // Optionally parse more address lines, which may have empty labels. | 219 // Optionally parse more address lines, which may have empty labels. |
| 241 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) | 220 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) |
| 242 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! | 221 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! |
| 243 if (is_ecml) { | 222 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE); |
| 244 pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|'); | 223 label_pattern = |
| 245 if (!ParseEmptyLabel(scanner, &address_field->address2_)) | 224 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); |
| 246 ParseField(scanner, pattern, &address_field->address2_); | 225 if (!ParseEmptyLabel(scanner, &address_field->address2_) && |
| 247 } else { | 226 !ParseField(scanner, pattern, &address_field->address2_)) { |
| 248 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE); | 227 ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
| 249 string16 label_pattern = | 228 &address_field->address2_); |
| 250 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); | |
| 251 if (!ParseEmptyLabel(scanner, &address_field->address2_) && | |
| 252 !ParseField(scanner, pattern, &address_field->address2_)) { | |
| 253 ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | |
| 254 &address_field->address2_); | |
| 255 } | |
| 256 } | 229 } |
| 257 | 230 |
| 258 // Try for a third line, which we will promptly discard. | 231 // Try for a third line, which we will promptly discard. |
| 259 if (address_field->address2_ != NULL) { | 232 if (address_field->address2_ != NULL) { |
| 260 if (is_ecml) { | 233 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE); |
| 261 pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|'); | 234 ParseField(scanner, pattern, NULL); |
| 262 ParseField(scanner, pattern, NULL); | |
| 263 } else { | |
| 264 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE); | |
| 265 ParseField(scanner, pattern, NULL); | |
| 266 } | |
| 267 } | 235 } |
| 268 | 236 |
| 269 return true; | 237 return true; |
| 270 } | 238 } |
| 271 | 239 |
| 272 // static | 240 // static |
| 273 bool AddressField::ParseCountry(AutofillScanner* scanner, | 241 bool AddressField::ParseCountry(AutofillScanner* scanner, |
| 274 bool is_ecml, | |
| 275 AddressField* address_field) { | 242 AddressField* address_field) { |
| 276 // Parse a country. The occasional page (e.g. | 243 // Parse a country. The occasional page (e.g. |
| 277 // Travelocity_New Member Information1.html) calls this a "location". | 244 // Travelocity_New Member Information1.html) calls this a "location". |
| 278 // Note: ECML standard uses 2 letter country code (ISO 3166) | |
| 279 if (address_field->country_ && !address_field->country_->IsEmpty()) | 245 if (address_field->country_ && !address_field->country_->IsEmpty()) |
| 280 return false; | 246 return false; |
| 281 | 247 |
| 282 string16 pattern; | 248 return ParseFieldSpecifics(scanner, |
| 283 if (is_ecml) | 249 l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE), |
| 284 pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|'); | 250 MATCH_DEFAULT | MATCH_SELECT, |
| 285 else | |
| 286 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE); | |
| 287 | |
| 288 return ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_SELECT, | |
| 289 &address_field->country_); | 251 &address_field->country_); |
| 290 } | 252 } |
| 291 | 253 |
| 292 // static | 254 // static |
| 293 bool AddressField::ParseZipCode(AutofillScanner* scanner, | 255 bool AddressField::ParseZipCode(AutofillScanner* scanner, |
| 294 bool is_ecml, | |
| 295 AddressField* address_field) { | 256 AddressField* address_field) { |
| 296 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this | 257 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this |
| 297 // is called a "post code". | 258 // is called a "post code". |
| 298 // | 259 // |
| 299 // HACK: Just for the MapQuest driving directions page we match the | 260 // HACK: Just for the MapQuest driving directions page we match the |
| 300 // exact name "1z", which MapQuest uses to label its zip code field. | 261 // exact name "1z", which MapQuest uses to label its zip code field. |
| 301 // Hopefully before long we'll be smart enough to find the zip code | 262 // Hopefully before long we'll be smart enough to find the zip code |
| 302 // on that page automatically. | 263 // on that page automatically. |
| 303 if (address_field->zip_) | 264 if (address_field->zip_) |
| 304 return false; | 265 return false; |
| 305 | 266 |
| 306 string16 pattern; | 267 string16 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_CODE_RE); |
| 307 if (is_ecml) { | |
| 308 pattern = GetEcmlPattern(kEcmlShipToPostalCode, kEcmlBillToPostalCode, '|'); | |
| 309 } else { | |
| 310 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_CODE_RE); | |
| 311 } | |
| 312 | |
| 313 AddressType tempType; | |
| 314 string16 name = scanner->Cursor()->name; | |
| 315 | |
| 316 // Note: comparisons using the ECML compliant name as a prefix must be used in | |
| 317 // order to accommodate Google Checkout. See |GetEcmlPattern| for more detail. | |
| 318 string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode)); | |
| 319 if (StartsWith(name, bill_to_postal_code_field, false)) { | |
| 320 tempType = kBillingAddress; | |
| 321 } else if (StartsWith(name, bill_to_postal_code_field, false)) { | |
| 322 tempType = kShippingAddress; | |
| 323 } else { | |
| 324 tempType = kGenericAddress; | |
| 325 } | |
| 326 | |
| 327 if (!ParseField(scanner, pattern, &address_field->zip_)) | 268 if (!ParseField(scanner, pattern, &address_field->zip_)) |
| 328 return false; | 269 return false; |
| 329 | 270 |
| 330 address_field->type_ = tempType; | 271 address_field->type_ = kGenericAddress; |
| 331 if (!is_ecml) { | 272 // Look for a zip+4, whose field name will also often contain |
| 332 // Look for a zip+4, whose field name will also often contain | 273 // the substring "zip". |
| 333 // the substring "zip". | 274 ParseField(scanner, |
| 334 ParseField(scanner, | 275 l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE), |
| 335 l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE), | 276 &address_field->zip4_); |
| 336 &address_field->zip4_); | |
| 337 } | |
| 338 | 277 |
| 339 return true; | 278 return true; |
| 340 } | 279 } |
| 341 | 280 |
| 342 // static | 281 // static |
| 343 bool AddressField::ParseCity(AutofillScanner* scanner, | 282 bool AddressField::ParseCity(AutofillScanner* scanner, |
| 344 bool is_ecml, | |
| 345 AddressField* address_field) { | 283 AddressField* address_field) { |
| 346 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use | 284 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use |
| 347 // the term "town". | 285 // the term "town". |
| 348 if (address_field->city_) | 286 if (address_field->city_) |
| 349 return false; | 287 return false; |
| 350 | 288 |
| 351 string16 pattern; | |
| 352 if (is_ecml) | |
| 353 pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|'); | |
| 354 else | |
| 355 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE); | |
| 356 | |
| 357 // Select fields are allowed here. This occurs on top-100 site rediff.com. | 289 // Select fields are allowed here. This occurs on top-100 site rediff.com. |
| 358 return ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_SELECT, | 290 return ParseFieldSpecifics(scanner, |
| 291 l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE), |
| 292 MATCH_DEFAULT | MATCH_SELECT, |
| 359 &address_field->city_); | 293 &address_field->city_); |
| 360 } | 294 } |
| 361 | 295 |
| 362 // static | 296 // static |
| 363 bool AddressField::ParseState(AutofillScanner* scanner, | 297 bool AddressField::ParseState(AutofillScanner* scanner, |
| 364 bool is_ecml, | |
| 365 AddressField* address_field) { | 298 AddressField* address_field) { |
| 366 if (address_field->state_) | 299 if (address_field->state_) |
| 367 return false; | 300 return false; |
| 368 | 301 |
| 369 string16 pattern; | 302 return ParseFieldSpecifics(scanner, |
| 370 if (is_ecml) | 303 l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE), |
| 371 pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|'); | 304 MATCH_DEFAULT | MATCH_SELECT, |
| 372 else | |
| 373 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE); | |
| 374 | |
| 375 return ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_SELECT, | |
| 376 &address_field->state_); | 305 &address_field->state_); |
| 377 } | 306 } |
| 378 | 307 |
| 379 AddressType AddressField::AddressTypeFromText(const string16 &text) { | 308 AddressType AddressField::AddressTypeFromText(const string16 &text) { |
| 380 if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE)) | 309 if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE)) |
| 381 != string16::npos || | 310 != string16::npos || |
| 382 text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE)) | 311 text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE)) |
| 383 != string16::npos) | 312 != string16::npos) |
| 384 // This text could be a checkbox label such as "same as my billing | 313 // This text could be a checkbox label such as "same as my billing |
| 385 // address" or "use my shipping address". | 314 // address" or "use my shipping address". |
| (...skipping 16 matching lines...) Expand all Loading... |
| 402 return kBillingAddress; | 331 return kBillingAddress; |
| 403 | 332 |
| 404 if (bill == string16::npos && ship != string16::npos) | 333 if (bill == string16::npos && ship != string16::npos) |
| 405 return kShippingAddress; | 334 return kShippingAddress; |
| 406 | 335 |
| 407 if (bill > ship) | 336 if (bill > ship) |
| 408 return kBillingAddress; | 337 return kBillingAddress; |
| 409 | 338 |
| 410 return kShippingAddress; | 339 return kShippingAddress; |
| 411 } | 340 } |
| OLD | NEW |