| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/autofill/address_field.h" | 5 #include "chrome/browser/autofill/address_field.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 | 8 |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
| 11 #include "base/string16.h" | 11 #include "base/string16.h" |
| 12 #include "base/string_util.h" | 12 #include "base/string_util.h" |
| 13 #include "base/utf_string_conversions.h" | 13 #include "base/utf_string_conversions.h" |
| 14 #include "chrome/browser/autofill/autofill_field.h" | 14 #include "chrome/browser/autofill/autofill_field.h" |
| 15 #include "chrome/browser/autofill/autofill_scanner.h" | 15 #include "chrome/browser/autofill/autofill_scanner.h" |
| 16 #include "chrome/browser/autofill/email_field.h" |
| 17 #include "chrome/browser/autofill/phone_field.h" |
| 16 #include "grit/autofill_resources.h" | 18 #include "grit/autofill_resources.h" |
| 17 #include "ui/base/l10n/l10n_util.h" | 19 #include "ui/base/l10n/l10n_util.h" |
| 18 | 20 |
| 19 bool AddressField::GetFieldInfo(FieldTypeMap* field_type_map) const { | 21 bool AddressField::GetFieldInfo(FieldTypeMap* field_type_map) const { |
| 20 AutofillFieldType address_company; | 22 AutofillFieldType address_company; |
| 21 AutofillFieldType address_line1; | 23 AutofillFieldType address_line1; |
| 22 AutofillFieldType address_line2; | 24 AutofillFieldType address_line2; |
| 23 AutofillFieldType address_city; | 25 AutofillFieldType address_city; |
| 24 AutofillFieldType address_state; | 26 AutofillFieldType address_state; |
| 25 AutofillFieldType address_zip; | 27 AutofillFieldType address_zip; |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 71 const AutofillField* initial_field = scanner->Cursor(); | 73 const AutofillField* initial_field = scanner->Cursor(); |
| 72 scanner->SaveCursor(); | 74 scanner->SaveCursor(); |
| 73 | 75 |
| 74 string16 attention_ignored = | 76 string16 attention_ignored = |
| 75 l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE); | 77 l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE); |
| 76 string16 region_ignored = | 78 string16 region_ignored = |
| 77 l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE); | 79 l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE); |
| 78 | 80 |
| 79 // Allow address fields to appear in any order. | 81 // Allow address fields to appear in any order. |
| 80 while (!scanner->IsEnd()) { | 82 while (!scanner->IsEnd()) { |
| 83 // Every loop, we have to parse email and phone, and if we find it, we |
| 84 // break the loop even while continuing address. Because we want to give |
| 85 // email and phone more priority than address. We have to rewind |scanner| |
| 86 // position after the email and phone parsing. |
| 87 size_t cursor_position = scanner->SaveCursor(); |
| 88 bool is_email = EmailField::Parse(scanner, is_ecml); |
| 89 scanner->RewindTo(cursor_position); |
| 90 if (is_email) |
| 91 break; |
| 92 |
| 93 cursor_position = scanner->SaveCursor(); |
| 94 bool is_phone = PhoneField::Parse(scanner, is_ecml); |
| 95 scanner->RewindTo(cursor_position); |
| 96 if (is_phone) |
| 97 break; |
| 98 |
| 81 if (ParseCompany(scanner, is_ecml, address_field.get()) || | 99 if (ParseCompany(scanner, is_ecml, address_field.get()) || |
| 82 ParseAddressLines(scanner, is_ecml, address_field.get()) || | 100 ParseAddressLines(scanner, is_ecml, address_field.get()) || |
| 83 ParseCity(scanner, is_ecml, address_field.get()) || | 101 ParseCity(scanner, is_ecml, address_field.get()) || |
| 84 ParseState(scanner, is_ecml, address_field.get()) || | 102 ParseState(scanner, is_ecml, address_field.get()) || |
| 85 ParseZipCode(scanner, is_ecml, address_field.get()) || | 103 ParseZipCode(scanner, is_ecml, address_field.get()) || |
| 86 ParseCountry(scanner, is_ecml, address_field.get())) { | 104 ParseCountry(scanner, is_ecml, address_field.get())) { |
| 87 continue; | 105 continue; |
| 88 } else if (ParseText(scanner, attention_ignored) || | 106 } else if (ParseText(scanner, attention_ignored, |
| 89 ParseText(scanner, region_ignored)) { | 107 MATCH_NAME | MATCH_LABEL | MATCH_TEXT) || |
| 108 ParseText(scanner, region_ignored, |
| 109 MATCH_NAME | MATCH_LABEL | MATCH_TEXT)) { |
| 90 // We ignore the following: | 110 // We ignore the following: |
| 91 // * Attention. | 111 // * Attention. |
| 92 // * Province/Region/Other. | 112 // * Province/Region/Other. |
| 93 continue; | 113 continue; |
| 94 } else if (scanner->Cursor() != initial_field && ParseEmpty(scanner)) { | 114 } else if (scanner->Cursor() != initial_field && ParseEmpty(scanner)) { |
| 95 // Ignore non-labeled fields within an address; the page | 115 // Ignore non-labeled fields within an address; the page |
| 96 // MapQuest Driving Directions North America.html contains such a field. | 116 // MapQuest Driving Directions North America.html contains such a field. |
| 97 // We only ignore such fields after we've parsed at least one other field; | 117 // We only ignore such fields after we've parsed at least one other field; |
| 98 // otherwise we'd effectively parse address fields before other field | 118 // otherwise we'd effectively parse address fields before other field |
| 99 // types after any non-labeled fields, and we want email address fields to | 119 // types after any non-labeled fields, and we want email address fields to |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 159 return false; | 179 return false; |
| 160 | 180 |
| 161 string16 pattern; | 181 string16 pattern; |
| 162 if (is_ecml) { | 182 if (is_ecml) { |
| 163 pattern = GetEcmlPattern(kEcmlShipToCompanyName, | 183 pattern = GetEcmlPattern(kEcmlShipToCompanyName, |
| 164 kEcmlBillToCompanyName, '|'); | 184 kEcmlBillToCompanyName, '|'); |
| 165 } else { | 185 } else { |
| 166 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE); | 186 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE); |
| 167 } | 187 } |
| 168 | 188 |
| 169 return ParseText(scanner, pattern, &address_field->company_); | 189 return ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 190 &address_field->company_); |
| 170 } | 191 } |
| 171 | 192 |
| 172 // static | 193 // static |
| 173 bool AddressField::ParseAddressLines(AutofillScanner* scanner, | 194 bool AddressField::ParseAddressLines(AutofillScanner* scanner, |
| 174 bool is_ecml, | 195 bool is_ecml, |
| 175 AddressField* address_field) { | 196 AddressField* address_field) { |
| 176 // We only match the string "address" in page text, not in element names, | 197 // We only match the string "address" in page text, not in element names, |
| 177 // because sometimes every element in a group of address fields will have | 198 // because sometimes every element in a group of address fields will have |
| 178 // a name containing the string "address"; for example, on the page | 199 // a name containing the string "address"; for example, on the page |
| 179 // Kohl's - Register Billing Address.html the text element labeled "city" | 200 // Kohl's - Register Billing Address.html the text element labeled "city" |
| 180 // has the name "BILL_TO_ADDRESS<>city". We do match address labels | 201 // has the name "BILL_TO_ADDRESS<>city". We do match address labels |
| 181 // such as "address1", which appear as element names on various pages (eg | 202 // such as "address1", which appear as element names on various pages (eg |
| 182 // AmericanGirl-Registration.html, BloomingdalesBilling.html, | 203 // AmericanGirl-Registration.html, BloomingdalesBilling.html, |
| 183 // EBay Registration Enter Information.html). | 204 // EBay Registration Enter Information.html). |
| 184 if (address_field->address1_) | 205 if (address_field->address1_) |
| 185 return false; | 206 return false; |
| 186 | 207 |
| 187 string16 pattern; | 208 string16 pattern; |
| 188 if (is_ecml) { | 209 if (is_ecml) { |
| 189 pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|'); | 210 pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|'); |
| 190 if (!ParseText(scanner, pattern, &address_field->address1_)) | 211 if (!ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 212 &address_field->address1_)) |
| 191 return false; | 213 return false; |
| 192 } else { | 214 } else { |
| 193 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE); | 215 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE); |
| 194 string16 label_pattern = | 216 string16 label_pattern = |
| 195 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); | 217 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); |
| 196 | 218 |
| 197 if (!ParseText(scanner, pattern, &address_field->address1_) && | 219 if (!ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 198 !ParseLabelText(scanner, label_pattern, &address_field->address1_)) | 220 &address_field->address1_) && |
| 221 !ParseText(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
| 222 &address_field->address1_)) |
| 199 return false; | 223 return false; |
| 200 } | 224 } |
| 201 | 225 |
| 202 // Optionally parse more address lines, which may have empty labels. | 226 // Optionally parse more address lines, which may have empty labels. |
| 203 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) | 227 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) |
| 204 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! | 228 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! |
| 205 if (is_ecml) { | 229 if (is_ecml) { |
| 206 pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|'); | 230 pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|'); |
| 207 if (!ParseEmptyText(scanner, &address_field->address2_)) | 231 if (!ParseEmptyText(scanner, &address_field->address2_)) |
| 208 ParseText(scanner, pattern, &address_field->address2_); | 232 ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 233 &address_field->address2_); |
| 209 } else { | 234 } else { |
| 210 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE); | 235 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE); |
| 211 string16 label_pattern = | 236 string16 label_pattern = |
| 212 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); | 237 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); |
| 213 if (!ParseEmptyText(scanner, &address_field->address2_) && | 238 if (!ParseEmptyText(scanner, &address_field->address2_) && |
| 214 !ParseText(scanner, pattern, &address_field->address2_)) | 239 !ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 215 ParseLabelText(scanner, label_pattern, &address_field->address2_); | 240 &address_field->address2_)) |
| 241 ParseText(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
| 242 &address_field->address2_); |
| 216 } | 243 } |
| 217 | 244 |
| 218 // Try for a third line, which we will promptly discard. | 245 // Try for a third line, which we will promptly discard. |
| 219 if (address_field->address2_ != NULL) { | 246 if (address_field->address2_ != NULL) { |
| 220 if (is_ecml) { | 247 if (is_ecml) { |
| 221 pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|'); | 248 pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|'); |
| 222 ParseText(scanner, pattern); | 249 ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT); |
| 223 } else { | 250 } else { |
| 224 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE); | 251 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE); |
| 225 if (!ParseEmptyText(scanner, NULL)) | 252 if (!ParseEmptyText(scanner, NULL)) |
| 226 ParseText(scanner, pattern, NULL); | 253 ParseText(scanner, pattern, |
| 254 MATCH_NAME | MATCH_LABEL | MATCH_TEXT, NULL); |
| 227 } | 255 } |
| 228 } | 256 } |
| 229 | 257 |
| 230 return true; | 258 return true; |
| 231 } | 259 } |
| 232 | 260 |
| 233 // static | 261 // static |
| 234 bool AddressField::ParseCountry(AutofillScanner* scanner, | 262 bool AddressField::ParseCountry(AutofillScanner* scanner, |
| 235 bool is_ecml, | 263 bool is_ecml, |
| 236 AddressField* address_field) { | 264 AddressField* address_field) { |
| 237 // Parse a country. The occasional page (e.g. | 265 // Parse a country. The occasional page (e.g. |
| 238 // Travelocity_New Member Information1.html) calls this a "location". | 266 // Travelocity_New Member Information1.html) calls this a "location". |
| 239 // Note: ECML standard uses 2 letter country code (ISO 3166) | 267 // Note: ECML standard uses 2 letter country code (ISO 3166) |
| 240 if (address_field->country_ && !address_field->country_->IsEmpty()) | 268 if (address_field->country_ && !address_field->country_->IsEmpty()) |
| 241 return false; | 269 return false; |
| 242 | 270 |
| 243 string16 pattern; | 271 string16 pattern; |
| 244 if (is_ecml) | 272 if (is_ecml) |
| 245 pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|'); | 273 pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|'); |
| 246 else | 274 else |
| 247 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE); | 275 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE); |
| 248 | 276 |
| 249 return ParseText(scanner, pattern, &address_field->country_); | 277 return ParseText(scanner, pattern, |
| 278 MATCH_NAME | MATCH_LABEL | MATCH_TEXT | MATCH_SELECT, |
| 279 &address_field->country_); |
| 250 } | 280 } |
| 251 | 281 |
| 252 // static | 282 // static |
| 253 bool AddressField::ParseZipCode(AutofillScanner* scanner, | 283 bool AddressField::ParseZipCode(AutofillScanner* scanner, |
| 254 bool is_ecml, | 284 bool is_ecml, |
| 255 AddressField* address_field) { | 285 AddressField* address_field) { |
| 256 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this | 286 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this |
| 257 // is called a "post code". | 287 // is called a "post code". |
| 258 // | 288 // |
| 259 // HACK: Just for the MapQuest driving directions page we match the | 289 // HACK: Just for the MapQuest driving directions page we match the |
| (...skipping 18 matching lines...) Expand all Loading... |
| 278 // more detail. | 308 // more detail. |
| 279 string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode)); | 309 string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode)); |
| 280 if (StartsWith(name, bill_to_postal_code_field, false)) { | 310 if (StartsWith(name, bill_to_postal_code_field, false)) { |
| 281 tempType = kBillingAddress; | 311 tempType = kBillingAddress; |
| 282 } else if (StartsWith(name, bill_to_postal_code_field, false)) { | 312 } else if (StartsWith(name, bill_to_postal_code_field, false)) { |
| 283 tempType = kShippingAddress; | 313 tempType = kShippingAddress; |
| 284 } else { | 314 } else { |
| 285 tempType = kGenericAddress; | 315 tempType = kGenericAddress; |
| 286 } | 316 } |
| 287 | 317 |
| 288 if (!ParseText(scanner, pattern, &address_field->zip_)) | 318 if (!ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 319 &address_field->zip_)) |
| 289 return false; | 320 return false; |
| 290 | 321 |
| 291 address_field->type_ = tempType; | 322 address_field->type_ = tempType; |
| 292 if (!is_ecml) { | 323 if (!is_ecml) { |
| 293 // Look for a zip+4, whose field name will also often contain | 324 // Look for a zip+4, whose field name will also often contain |
| 294 // the substring "zip". | 325 // the substring "zip". |
| 295 ParseText(scanner, | 326 ParseText(scanner, |
| 296 l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE), | 327 l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE), |
| 328 MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 297 &address_field->zip4_); | 329 &address_field->zip4_); |
| 298 } | 330 } |
| 299 | 331 |
| 300 return true; | 332 return true; |
| 301 } | 333 } |
| 302 | 334 |
| 303 // static | 335 // static |
| 304 bool AddressField::ParseCity(AutofillScanner* scanner, | 336 bool AddressField::ParseCity(AutofillScanner* scanner, |
| 305 bool is_ecml, | 337 bool is_ecml, |
| 306 AddressField* address_field) { | 338 AddressField* address_field) { |
| 307 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use | 339 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use |
| 308 // the term "town". | 340 // the term "town". |
| 309 if (address_field->city_) | 341 if (address_field->city_) |
| 310 return false; | 342 return false; |
| 311 | 343 |
| 312 string16 pattern; | 344 string16 pattern; |
| 313 if (is_ecml) | 345 if (is_ecml) |
| 314 pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|'); | 346 pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|'); |
| 315 else | 347 else |
| 316 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE); | 348 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE); |
| 317 | 349 |
| 318 return ParseText(scanner, pattern, &address_field->city_); | 350 return ParseText(scanner, pattern, |
| 351 MATCH_NAME | MATCH_LABEL | MATCH_TEXT | MATCH_SELECT, |
| 352 &address_field->city_); |
| 319 } | 353 } |
| 320 | 354 |
| 321 // static | 355 // static |
| 322 bool AddressField::ParseState(AutofillScanner* scanner, | 356 bool AddressField::ParseState(AutofillScanner* scanner, |
| 323 bool is_ecml, | 357 bool is_ecml, |
| 324 AddressField* address_field) { | 358 AddressField* address_field) { |
| 325 if (address_field->state_) | 359 if (address_field->state_) |
| 326 return false; | 360 return false; |
| 327 | 361 |
| 328 string16 pattern; | 362 string16 pattern; |
| 329 if (is_ecml) | 363 if (is_ecml) |
| 330 pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|'); | 364 pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|'); |
| 331 else | 365 else |
| 332 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE); | 366 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE); |
| 333 | 367 |
| 334 return ParseText(scanner, pattern, &address_field->state_); | 368 return ParseText(scanner, pattern, |
| 369 MATCH_NAME | MATCH_LABEL | MATCH_TEXT | MATCH_SELECT, |
| 370 &address_field->state_); |
| 335 } | 371 } |
| 336 | 372 |
| 337 AddressType AddressField::AddressTypeFromText(const string16 &text) { | 373 AddressType AddressField::AddressTypeFromText(const string16 &text) { |
| 338 if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE)) | 374 if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE)) |
| 339 != string16::npos || | 375 != string16::npos || |
| 340 text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE)) | 376 text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE)) |
| 341 != string16::npos) | 377 != string16::npos) |
| 342 // This text could be a checkbox label such as "same as my billing | 378 // This text could be a checkbox label such as "same as my billing |
| 343 // address" or "use my shipping address". | 379 // address" or "use my shipping address". |
| 344 // ++ It would help if we generally skipped all text that appears | 380 // ++ It would help if we generally skipped all text that appears |
| (...skipping 15 matching lines...) Expand all Loading... |
| 360 return kBillingAddress; | 396 return kBillingAddress; |
| 361 | 397 |
| 362 if (bill == string16::npos && ship != string16::npos) | 398 if (bill == string16::npos && ship != string16::npos) |
| 363 return kShippingAddress; | 399 return kShippingAddress; |
| 364 | 400 |
| 365 if (bill > ship) | 401 if (bill > ship) |
| 366 return kBillingAddress; | 402 return kBillingAddress; |
| 367 | 403 |
| 368 return kShippingAddress; | 404 return kShippingAddress; |
| 369 } | 405 } |
| OLD | NEW |