| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/autofill/address_field.h" | 5 #include "chrome/browser/autofill/address_field.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 | 8 |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 78 | 78 |
| 79 // Allow address fields to appear in any order. | 79 // Allow address fields to appear in any order. |
| 80 while (!scanner->IsEnd()) { | 80 while (!scanner->IsEnd()) { |
| 81 if (ParseCompany(scanner, is_ecml, address_field.get()) || | 81 if (ParseCompany(scanner, is_ecml, address_field.get()) || |
| 82 ParseAddressLines(scanner, is_ecml, address_field.get()) || | 82 ParseAddressLines(scanner, is_ecml, address_field.get()) || |
| 83 ParseCity(scanner, is_ecml, address_field.get()) || | 83 ParseCity(scanner, is_ecml, address_field.get()) || |
| 84 ParseState(scanner, is_ecml, address_field.get()) || | 84 ParseState(scanner, is_ecml, address_field.get()) || |
| 85 ParseZipCode(scanner, is_ecml, address_field.get()) || | 85 ParseZipCode(scanner, is_ecml, address_field.get()) || |
| 86 ParseCountry(scanner, is_ecml, address_field.get())) { | 86 ParseCountry(scanner, is_ecml, address_field.get())) { |
| 87 continue; | 87 continue; |
| 88 } else if (ParseText(scanner, attention_ignored) || | 88 } else if (ParseText(scanner, attention_ignored, |
| 89 ParseText(scanner, region_ignored)) { | 89 MATCH_NAME | MATCH_LABEL | MATCH_TEXT) || |
| 90 ParseText(scanner, region_ignored, |
| 91 MATCH_NAME | MATCH_LABEL | MATCH_TEXT)) { |
| 90 // We ignore the following: | 92 // We ignore the following: |
| 91 // * Attention. | 93 // * Attention. |
| 92 // * Province/Region/Other. | 94 // * Province/Region/Other. |
| 93 continue; | 95 continue; |
| 94 } else if (scanner->Cursor() != initial_field && ParseEmpty(scanner)) { | 96 } else if (scanner->Cursor() != initial_field && ParseEmpty(scanner)) { |
| 95 // Ignore non-labeled fields within an address; the page | 97 // Ignore non-labeled fields within an address; the page |
| 96 // MapQuest Driving Directions North America.html contains such a field. | 98 // MapQuest Driving Directions North America.html contains such a field. |
| 97 // We only ignore such fields after we've parsed at least one other field; | 99 // We only ignore such fields after we've parsed at least one other field; |
| 98 // otherwise we'd effectively parse address fields before other field | 100 // otherwise we'd effectively parse address fields before other field |
| 99 // types after any non-labeled fields, and we want email address fields to | 101 // types after any non-labeled fields, and we want email address fields to |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 159 return false; | 161 return false; |
| 160 | 162 |
| 161 string16 pattern; | 163 string16 pattern; |
| 162 if (is_ecml) { | 164 if (is_ecml) { |
| 163 pattern = GetEcmlPattern(kEcmlShipToCompanyName, | 165 pattern = GetEcmlPattern(kEcmlShipToCompanyName, |
| 164 kEcmlBillToCompanyName, '|'); | 166 kEcmlBillToCompanyName, '|'); |
| 165 } else { | 167 } else { |
| 166 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE); | 168 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE); |
| 167 } | 169 } |
| 168 | 170 |
| 169 return ParseText(scanner, pattern, &address_field->company_); | 171 return ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 172 &address_field->company_); |
| 170 } | 173 } |
| 171 | 174 |
| 172 // static | 175 // static |
| 173 bool AddressField::ParseAddressLines(AutofillScanner* scanner, | 176 bool AddressField::ParseAddressLines(AutofillScanner* scanner, |
| 174 bool is_ecml, | 177 bool is_ecml, |
| 175 AddressField* address_field) { | 178 AddressField* address_field) { |
| 176 // We only match the string "address" in page text, not in element names, | 179 // We only match the string "address" in page text, not in element names, |
| 177 // because sometimes every element in a group of address fields will have | 180 // because sometimes every element in a group of address fields will have |
| 178 // a name containing the string "address"; for example, on the page | 181 // a name containing the string "address"; for example, on the page |
| 179 // Kohl's - Register Billing Address.html the text element labeled "city" | 182 // Kohl's - Register Billing Address.html the text element labeled "city" |
| 180 // has the name "BILL_TO_ADDRESS<>city". We do match address labels | 183 // has the name "BILL_TO_ADDRESS<>city". We do match address labels |
| 181 // such as "address1", which appear as element names on various pages (eg | 184 // such as "address1", which appear as element names on various pages (eg |
| 182 // AmericanGirl-Registration.html, BloomingdalesBilling.html, | 185 // AmericanGirl-Registration.html, BloomingdalesBilling.html, |
| 183 // EBay Registration Enter Information.html). | 186 // EBay Registration Enter Information.html). |
| 184 if (address_field->address1_) | 187 if (address_field->address1_) |
| 185 return false; | 188 return false; |
| 186 | 189 |
| 187 string16 pattern; | 190 string16 pattern; |
| 188 if (is_ecml) { | 191 if (is_ecml) { |
| 189 pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|'); | 192 pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|'); |
| 190 if (!ParseText(scanner, pattern, &address_field->address1_)) | 193 if (!ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 194 &address_field->address1_)) |
| 191 return false; | 195 return false; |
| 192 } else { | 196 } else { |
| 193 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE); | 197 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE); |
| 194 string16 label_pattern = | 198 string16 label_pattern = |
| 195 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); | 199 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); |
| 196 | 200 |
| 197 if (!ParseText(scanner, pattern, &address_field->address1_) && | 201 if (!ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 198 !ParseLabelText(scanner, label_pattern, &address_field->address1_)) | 202 &address_field->address1_) && |
| 203 !ParseText(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
| 204 &address_field->address1_)) |
| 199 return false; | 205 return false; |
| 200 } | 206 } |
| 201 | 207 |
| 202 // Optionally parse more address lines, which may have empty labels. | 208 // Optionally parse more address lines, which may have empty labels. |
| 203 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) | 209 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) |
| 204 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! | 210 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! |
| 205 if (is_ecml) { | 211 if (is_ecml) { |
| 206 pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|'); | 212 pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|'); |
| 207 if (!ParseEmptyText(scanner, &address_field->address2_)) | 213 if (!ParseEmptyText(scanner, &address_field->address2_)) |
| 208 ParseText(scanner, pattern, &address_field->address2_); | 214 ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 215 &address_field->address2_); |
| 209 } else { | 216 } else { |
| 210 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE); | 217 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE); |
| 211 string16 label_pattern = | 218 string16 label_pattern = |
| 212 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); | 219 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); |
| 213 if (!ParseEmptyText(scanner, &address_field->address2_) && | 220 if (!ParseEmptyText(scanner, &address_field->address2_) && |
| 214 !ParseText(scanner, pattern, &address_field->address2_)) | 221 !ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 215 ParseLabelText(scanner, label_pattern, &address_field->address2_); | 222 &address_field->address2_)) |
| 223 ParseText(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
| 224 &address_field->address2_); |
| 216 } | 225 } |
| 217 | 226 |
| 218 // Try for a third line, which we will promptly discard. | 227 // Try for a third line, which we will promptly discard. |
| 219 if (address_field->address2_ != NULL) { | 228 if (address_field->address2_ != NULL) { |
| 220 if (is_ecml) { | 229 if (is_ecml) { |
| 221 pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|'); | 230 pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|'); |
| 222 ParseText(scanner, pattern); | 231 ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT); |
| 223 } else { | 232 } else { |
| 224 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE); | 233 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE); |
| 225 if (!ParseEmptyText(scanner, NULL)) | 234 if (!ParseEmptyText(scanner, NULL)) |
| 226 ParseText(scanner, pattern, NULL); | 235 ParseText(scanner, pattern, |
| 236 MATCH_NAME | MATCH_LABEL | MATCH_TEXT, NULL); |
| 227 } | 237 } |
| 228 } | 238 } |
| 229 | 239 |
| 230 return true; | 240 return true; |
| 231 } | 241 } |
| 232 | 242 |
| 233 // static | 243 // static |
| 234 bool AddressField::ParseCountry(AutofillScanner* scanner, | 244 bool AddressField::ParseCountry(AutofillScanner* scanner, |
| 235 bool is_ecml, | 245 bool is_ecml, |
| 236 AddressField* address_field) { | 246 AddressField* address_field) { |
| 237 // Parse a country. The occasional page (e.g. | 247 // Parse a country. The occasional page (e.g. |
| 238 // Travelocity_New Member Information1.html) calls this a "location". | 248 // Travelocity_New Member Information1.html) calls this a "location". |
| 239 // Note: ECML standard uses 2 letter country code (ISO 3166) | 249 // Note: ECML standard uses 2 letter country code (ISO 3166) |
| 240 if (address_field->country_ && !address_field->country_->IsEmpty()) | 250 if (address_field->country_ && !address_field->country_->IsEmpty()) |
| 241 return false; | 251 return false; |
| 242 | 252 |
| 243 string16 pattern; | 253 string16 pattern; |
| 244 if (is_ecml) | 254 if (is_ecml) |
| 245 pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|'); | 255 pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|'); |
| 246 else | 256 else |
| 247 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE); | 257 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE); |
| 248 | 258 |
| 249 return ParseText(scanner, pattern, &address_field->country_); | 259 return ParseText(scanner, pattern, |
| 260 MATCH_NAME | MATCH_LABEL | MATCH_TEXT | MATCH_SELECT, |
| 261 &address_field->country_); |
| 250 } | 262 } |
| 251 | 263 |
| 252 // static | 264 // static |
| 253 bool AddressField::ParseZipCode(AutofillScanner* scanner, | 265 bool AddressField::ParseZipCode(AutofillScanner* scanner, |
| 254 bool is_ecml, | 266 bool is_ecml, |
| 255 AddressField* address_field) { | 267 AddressField* address_field) { |
| 256 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this | 268 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this |
| 257 // is called a "post code". | 269 // is called a "post code". |
| 258 // | 270 // |
| 259 // HACK: Just for the MapQuest driving directions page we match the | 271 // HACK: Just for the MapQuest driving directions page we match the |
| (...skipping 18 matching lines...) Expand all Loading... |
| 278 // more detail. | 290 // more detail. |
| 279 string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode)); | 291 string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode)); |
| 280 if (StartsWith(name, bill_to_postal_code_field, false)) { | 292 if (StartsWith(name, bill_to_postal_code_field, false)) { |
| 281 tempType = kBillingAddress; | 293 tempType = kBillingAddress; |
| 282 } else if (StartsWith(name, bill_to_postal_code_field, false)) { | 294 } else if (StartsWith(name, bill_to_postal_code_field, false)) { |
| 283 tempType = kShippingAddress; | 295 tempType = kShippingAddress; |
| 284 } else { | 296 } else { |
| 285 tempType = kGenericAddress; | 297 tempType = kGenericAddress; |
| 286 } | 298 } |
| 287 | 299 |
| 288 if (!ParseText(scanner, pattern, &address_field->zip_)) | 300 if (!ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 301 &address_field->zip_)) |
| 289 return false; | 302 return false; |
| 290 | 303 |
| 291 address_field->type_ = tempType; | 304 address_field->type_ = tempType; |
| 292 if (!is_ecml) { | 305 if (!is_ecml) { |
| 293 // Look for a zip+4, whose field name will also often contain | 306 // Look for a zip+4, whose field name will also often contain |
| 294 // the substring "zip". | 307 // the substring "zip". |
| 295 ParseText(scanner, | 308 ParseText(scanner, |
| 296 l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE), | 309 l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE), |
| 310 MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 297 &address_field->zip4_); | 311 &address_field->zip4_); |
| 298 } | 312 } |
| 299 | 313 |
| 300 return true; | 314 return true; |
| 301 } | 315 } |
| 302 | 316 |
| 303 // static | 317 // static |
| 304 bool AddressField::ParseCity(AutofillScanner* scanner, | 318 bool AddressField::ParseCity(AutofillScanner* scanner, |
| 305 bool is_ecml, | 319 bool is_ecml, |
| 306 AddressField* address_field) { | 320 AddressField* address_field) { |
| 307 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use | 321 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use |
| 308 // the term "town". | 322 // the term "town". |
| 309 if (address_field->city_) | 323 if (address_field->city_) |
| 310 return false; | 324 return false; |
| 311 | 325 |
| 312 string16 pattern; | 326 string16 pattern; |
| 313 if (is_ecml) | 327 if (is_ecml) |
| 314 pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|'); | 328 pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|'); |
| 315 else | 329 else |
| 316 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE); | 330 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE); |
| 317 | 331 |
| 318 return ParseText(scanner, pattern, &address_field->city_); | 332 return ParseText(scanner, pattern, |
| 333 MATCH_NAME | MATCH_LABEL | MATCH_TEXT | MATCH_SELECT, |
| 334 &address_field->city_); |
| 319 } | 335 } |
| 320 | 336 |
| 321 // static | 337 // static |
| 322 bool AddressField::ParseState(AutofillScanner* scanner, | 338 bool AddressField::ParseState(AutofillScanner* scanner, |
| 323 bool is_ecml, | 339 bool is_ecml, |
| 324 AddressField* address_field) { | 340 AddressField* address_field) { |
| 325 if (address_field->state_) | 341 if (address_field->state_) |
| 326 return false; | 342 return false; |
| 327 | 343 |
| 328 string16 pattern; | 344 string16 pattern; |
| 329 if (is_ecml) | 345 if (is_ecml) |
| 330 pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|'); | 346 pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|'); |
| 331 else | 347 else |
| 332 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE); | 348 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE); |
| 333 | 349 |
| 334 return ParseText(scanner, pattern, &address_field->state_); | 350 return ParseText(scanner, pattern, |
| 351 MATCH_NAME | MATCH_LABEL | MATCH_TEXT | MATCH_SELECT, |
| 352 &address_field->state_); |
| 335 } | 353 } |
| 336 | 354 |
| 337 AddressType AddressField::AddressTypeFromText(const string16 &text) { | 355 AddressType AddressField::AddressTypeFromText(const string16 &text) { |
| 338 if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE)) | 356 if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE)) |
| 339 != string16::npos || | 357 != string16::npos || |
| 340 text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE)) | 358 text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE)) |
| 341 != string16::npos) | 359 != string16::npos) |
| 342 // This text could be a checkbox label such as "same as my billing | 360 // This text could be a checkbox label such as "same as my billing |
| 343 // address" or "use my shipping address". | 361 // address" or "use my shipping address". |
| 344 // ++ It would help if we generally skipped all text that appears | 362 // ++ It would help if we generally skipped all text that appears |
| (...skipping 15 matching lines...) Expand all Loading... |
| 360 return kBillingAddress; | 378 return kBillingAddress; |
| 361 | 379 |
| 362 if (bill == string16::npos && ship != string16::npos) | 380 if (bill == string16::npos && ship != string16::npos) |
| 363 return kShippingAddress; | 381 return kShippingAddress; |
| 364 | 382 |
| 365 if (bill > ship) | 383 if (bill > ship) |
| 366 return kBillingAddress; | 384 return kBillingAddress; |
| 367 | 385 |
| 368 return kShippingAddress; | 386 return kShippingAddress; |
| 369 } | 387 } |
| OLD | NEW |