| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/browser/autofill/address_field.h" | |
| 6 | |
| 7 #include <stddef.h> | |
| 8 | |
| 9 #include "base/logging.h" | |
| 10 #include "base/memory/scoped_ptr.h" | |
| 11 #include "base/string16.h" | |
| 12 #include "base/string_util.h" | |
| 13 #include "base/utf_string_conversions.h" | |
| 14 #include "chrome/browser/autofill/autofill_field.h" | |
| 15 #include "chrome/browser/autofill/autofill_regex_constants.h" | |
| 16 #include "chrome/browser/autofill/autofill_scanner.h" | |
| 17 #include "chrome/browser/autofill/field_types.h" | |
| 18 #include "ui/base/l10n/l10n_util.h" | |
| 19 | |
| 20 FormField* AddressField::Parse(AutofillScanner* scanner) { | |
| 21 if (scanner->IsEnd()) | |
| 22 return NULL; | |
| 23 | |
| 24 scoped_ptr<AddressField> address_field(new AddressField); | |
| 25 const AutofillField* const initial_field = scanner->Cursor(); | |
| 26 size_t saved_cursor = scanner->SaveCursor(); | |
| 27 | |
| 28 string16 attention_ignored = UTF8ToUTF16(autofill::kAttentionIgnoredRe); | |
| 29 string16 region_ignored = UTF8ToUTF16(autofill::kRegionIgnoredRe); | |
| 30 | |
| 31 // Allow address fields to appear in any order. | |
| 32 size_t begin_trailing_non_labeled_fields = 0; | |
| 33 bool has_trailing_non_labeled_fields = false; | |
| 34 while (!scanner->IsEnd()) { | |
| 35 const size_t cursor = scanner->SaveCursor(); | |
| 36 if (ParseAddressLines(scanner, address_field.get()) || | |
| 37 ParseCity(scanner, address_field.get()) || | |
| 38 ParseState(scanner, address_field.get()) || | |
| 39 ParseZipCode(scanner, address_field.get()) || | |
| 40 ParseCountry(scanner, address_field.get()) || | |
| 41 ParseCompany(scanner, address_field.get())) { | |
| 42 has_trailing_non_labeled_fields = false; | |
| 43 continue; | |
| 44 } else if (ParseField(scanner, attention_ignored, NULL) || | |
| 45 ParseField(scanner, region_ignored, NULL)) { | |
| 46 // We ignore the following: | |
| 47 // * Attention. | |
| 48 // * Province/Region/Other. | |
| 49 continue; | |
| 50 } else if (scanner->Cursor() != initial_field && | |
| 51 ParseEmptyLabel(scanner, NULL)) { | |
| 52 // Ignore non-labeled fields within an address; the page | |
| 53 // MapQuest Driving Directions North America.html contains such a field. | |
| 54 // We only ignore such fields after we've parsed at least one other field; | |
| 55 // otherwise we'd effectively parse address fields before other field | |
| 56 // types after any non-labeled fields, and we want email address fields to | |
| 57 // have precedence since some pages contain fields labeled | |
| 58 // "Email address". | |
| 59 if (!has_trailing_non_labeled_fields) { | |
| 60 has_trailing_non_labeled_fields = true; | |
| 61 begin_trailing_non_labeled_fields = cursor; | |
| 62 } | |
| 63 | |
| 64 continue; | |
| 65 } else { | |
| 66 // No field found. | |
| 67 break; | |
| 68 } | |
| 69 } | |
| 70 | |
| 71 // If we have identified any address fields in this field then it should be | |
| 72 // added to the list of fields. | |
| 73 if (address_field->company_ != NULL || | |
| 74 address_field->address1_ != NULL || address_field->address2_ != NULL || | |
| 75 address_field->city_ != NULL || address_field->state_ != NULL || | |
| 76 address_field->zip_ != NULL || address_field->zip4_ || | |
| 77 address_field->country_ != NULL) { | |
| 78 // Don't slurp non-labeled fields at the end into the address. | |
| 79 if (has_trailing_non_labeled_fields) | |
| 80 scanner->RewindTo(begin_trailing_non_labeled_fields); | |
| 81 | |
| 82 address_field->type_ = address_field->FindType(); | |
| 83 return address_field.release(); | |
| 84 } | |
| 85 | |
| 86 scanner->RewindTo(saved_cursor); | |
| 87 return NULL; | |
| 88 } | |
| 89 | |
| 90 AddressField::AddressType AddressField::FindType() const { | |
| 91 // First look at the field name, which itself will sometimes contain | |
| 92 // "bill" or "ship". | |
| 93 if (company_) { | |
| 94 string16 name = StringToLowerASCII(company_->name); | |
| 95 return AddressTypeFromText(name); | |
| 96 } | |
| 97 if (address1_) { | |
| 98 string16 name = StringToLowerASCII(address1_->name); | |
| 99 return AddressTypeFromText(name); | |
| 100 } | |
| 101 if (address2_) { | |
| 102 string16 name = StringToLowerASCII(address2_->name); | |
| 103 return AddressTypeFromText(name); | |
| 104 } | |
| 105 if (city_) { | |
| 106 string16 name = StringToLowerASCII(city_->name); | |
| 107 return AddressTypeFromText(name); | |
| 108 } | |
| 109 if (zip_) { | |
| 110 string16 name = StringToLowerASCII(zip_->name); | |
| 111 return AddressTypeFromText(name); | |
| 112 } | |
| 113 if (state_) { | |
| 114 string16 name = StringToLowerASCII(state_->name); | |
| 115 return AddressTypeFromText(name); | |
| 116 } | |
| 117 if (country_) { | |
| 118 string16 name = StringToLowerASCII(country_->name); | |
| 119 return AddressTypeFromText(name); | |
| 120 } | |
| 121 | |
| 122 return kGenericAddress; | |
| 123 } | |
| 124 | |
| 125 AddressField::AddressField() | |
| 126 : company_(NULL), | |
| 127 address1_(NULL), | |
| 128 address2_(NULL), | |
| 129 city_(NULL), | |
| 130 state_(NULL), | |
| 131 zip_(NULL), | |
| 132 zip4_(NULL), | |
| 133 country_(NULL), | |
| 134 type_(kGenericAddress) { | |
| 135 } | |
| 136 | |
| 137 bool AddressField::ClassifyField(FieldTypeMap* map) const { | |
| 138 AutofillFieldType address_company; | |
| 139 AutofillFieldType address_line1; | |
| 140 AutofillFieldType address_line2; | |
| 141 AutofillFieldType address_city; | |
| 142 AutofillFieldType address_state; | |
| 143 AutofillFieldType address_zip; | |
| 144 AutofillFieldType address_country; | |
| 145 | |
| 146 switch (type_) { | |
| 147 case kShippingAddress: | |
| 148 // Fall through. Autofill does not support shipping addresses. | |
| 149 case kGenericAddress: | |
| 150 address_company = COMPANY_NAME; | |
| 151 address_line1 = ADDRESS_HOME_LINE1; | |
| 152 address_line2 = ADDRESS_HOME_LINE2; | |
| 153 address_city = ADDRESS_HOME_CITY; | |
| 154 address_state = ADDRESS_HOME_STATE; | |
| 155 address_zip = ADDRESS_HOME_ZIP; | |
| 156 address_country = ADDRESS_HOME_COUNTRY; | |
| 157 break; | |
| 158 | |
| 159 case kBillingAddress: | |
| 160 address_company = COMPANY_NAME; | |
| 161 address_line1 = ADDRESS_BILLING_LINE1; | |
| 162 address_line2 = ADDRESS_BILLING_LINE2; | |
| 163 address_city = ADDRESS_BILLING_CITY; | |
| 164 address_state = ADDRESS_BILLING_STATE; | |
| 165 address_zip = ADDRESS_BILLING_ZIP; | |
| 166 address_country = ADDRESS_BILLING_COUNTRY; | |
| 167 break; | |
| 168 | |
| 169 default: | |
| 170 NOTREACHED(); | |
| 171 return false; | |
| 172 } | |
| 173 | |
| 174 bool ok = AddClassification(company_, address_company, map); | |
| 175 ok = ok && AddClassification(address1_, address_line1, map); | |
| 176 ok = ok && AddClassification(address2_, address_line2, map); | |
| 177 ok = ok && AddClassification(city_, address_city, map); | |
| 178 ok = ok && AddClassification(state_, address_state, map); | |
| 179 ok = ok && AddClassification(zip_, address_zip, map); | |
| 180 ok = ok && AddClassification(country_, address_country, map); | |
| 181 return ok; | |
| 182 } | |
| 183 | |
| 184 // static | |
| 185 bool AddressField::ParseCompany(AutofillScanner* scanner, | |
| 186 AddressField* address_field) { | |
| 187 if (address_field->company_ && !address_field->company_->IsEmpty()) | |
| 188 return false; | |
| 189 | |
| 190 return ParseField(scanner, UTF8ToUTF16(autofill::kCompanyRe), | |
| 191 &address_field->company_); | |
| 192 } | |
| 193 | |
| 194 // static | |
| 195 bool AddressField::ParseAddressLines(AutofillScanner* scanner, | |
| 196 AddressField* address_field) { | |
| 197 // We only match the string "address" in page text, not in element names, | |
| 198 // because sometimes every element in a group of address fields will have | |
| 199 // a name containing the string "address"; for example, on the page | |
| 200 // Kohl's - Register Billing Address.html the text element labeled "city" | |
| 201 // has the name "BILL_TO_ADDRESS<>city". We do match address labels | |
| 202 // such as "address1", which appear as element names on various pages (eg | |
| 203 // AmericanGirl-Registration.html, BloomingdalesBilling.html, | |
| 204 // EBay Registration Enter Information.html). | |
| 205 if (address_field->address1_) | |
| 206 return false; | |
| 207 | |
| 208 string16 pattern = UTF8ToUTF16(autofill::kAddressLine1Re); | |
| 209 string16 label_pattern = UTF8ToUTF16(autofill::kAddressLine1LabelRe); | |
| 210 | |
| 211 if (!ParseField(scanner, pattern, &address_field->address1_) && | |
| 212 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | |
| 213 &address_field->address1_)) { | |
| 214 return false; | |
| 215 } | |
| 216 | |
| 217 // Optionally parse more address lines, which may have empty labels. | |
| 218 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) | |
| 219 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! | |
| 220 pattern = UTF8ToUTF16(autofill::kAddressLine2Re); | |
| 221 label_pattern = UTF8ToUTF16(autofill::kAddressLine2LabelRe); | |
| 222 if (!ParseEmptyLabel(scanner, &address_field->address2_) && | |
| 223 !ParseField(scanner, pattern, &address_field->address2_)) { | |
| 224 ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | |
| 225 &address_field->address2_); | |
| 226 } | |
| 227 | |
| 228 // Try for a third line, which we will promptly discard. | |
| 229 if (address_field->address2_ != NULL) { | |
| 230 pattern = UTF8ToUTF16(autofill::kAddressLine3Re); | |
| 231 ParseField(scanner, pattern, NULL); | |
| 232 } | |
| 233 | |
| 234 return true; | |
| 235 } | |
| 236 | |
| 237 // static | |
| 238 bool AddressField::ParseCountry(AutofillScanner* scanner, | |
| 239 AddressField* address_field) { | |
| 240 // Parse a country. The occasional page (e.g. | |
| 241 // Travelocity_New Member Information1.html) calls this a "location". | |
| 242 if (address_field->country_ && !address_field->country_->IsEmpty()) | |
| 243 return false; | |
| 244 | |
| 245 return ParseFieldSpecifics(scanner, | |
| 246 UTF8ToUTF16(autofill::kCountryRe), | |
| 247 MATCH_DEFAULT | MATCH_SELECT, | |
| 248 &address_field->country_); | |
| 249 } | |
| 250 | |
| 251 // static | |
| 252 bool AddressField::ParseZipCode(AutofillScanner* scanner, | |
| 253 AddressField* address_field) { | |
| 254 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this | |
| 255 // is called a "post code". | |
| 256 // | |
| 257 // HACK: Just for the MapQuest driving directions page we match the | |
| 258 // exact name "1z", which MapQuest uses to label its zip code field. | |
| 259 // Hopefully before long we'll be smart enough to find the zip code | |
| 260 // on that page automatically. | |
| 261 if (address_field->zip_) | |
| 262 return false; | |
| 263 | |
| 264 string16 pattern = UTF8ToUTF16(autofill::kZipCodeRe); | |
| 265 if (!ParseField(scanner, pattern, &address_field->zip_)) | |
| 266 return false; | |
| 267 | |
| 268 address_field->type_ = kGenericAddress; | |
| 269 // Look for a zip+4, whose field name will also often contain | |
| 270 // the substring "zip". | |
| 271 ParseField(scanner, | |
| 272 UTF8ToUTF16(autofill::kZip4Re), | |
| 273 &address_field->zip4_); | |
| 274 | |
| 275 return true; | |
| 276 } | |
| 277 | |
| 278 // static | |
| 279 bool AddressField::ParseCity(AutofillScanner* scanner, | |
| 280 AddressField* address_field) { | |
| 281 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use | |
| 282 // the term "town". | |
| 283 if (address_field->city_) | |
| 284 return false; | |
| 285 | |
| 286 // Select fields are allowed here. This occurs on top-100 site rediff.com. | |
| 287 return ParseFieldSpecifics(scanner, | |
| 288 UTF8ToUTF16(autofill::kCityRe), | |
| 289 MATCH_DEFAULT | MATCH_SELECT, | |
| 290 &address_field->city_); | |
| 291 } | |
| 292 | |
| 293 // static | |
| 294 bool AddressField::ParseState(AutofillScanner* scanner, | |
| 295 AddressField* address_field) { | |
| 296 if (address_field->state_) | |
| 297 return false; | |
| 298 | |
| 299 return ParseFieldSpecifics(scanner, | |
| 300 UTF8ToUTF16(autofill::kStateRe), | |
| 301 MATCH_DEFAULT | MATCH_SELECT, | |
| 302 &address_field->state_); | |
| 303 } | |
| 304 | |
| 305 AddressField::AddressType AddressField::AddressTypeFromText( | |
| 306 const string16 &text) { | |
| 307 size_t same_as = text.find(UTF8ToUTF16(autofill::kAddressTypeSameAsRe)); | |
| 308 size_t use_shipping = text.find(UTF8ToUTF16(autofill::kAddressTypeUseMyRe)); | |
| 309 if (same_as != string16::npos || use_shipping != string16::npos) | |
| 310 // This text could be a checkbox label such as "same as my billing | |
| 311 // address" or "use my shipping address". | |
| 312 // ++ It would help if we generally skipped all text that appears | |
| 313 // after a check box. | |
| 314 return kGenericAddress; | |
| 315 | |
| 316 // Not all pages say "billing address" and "shipping address" explicitly; | |
| 317 // for example, Craft Catalog1.html has "Bill-to Address" and | |
| 318 // "Ship-to Address". | |
| 319 size_t bill = text.rfind(UTF8ToUTF16(autofill::kBillingDesignatorRe)); | |
| 320 size_t ship = text.rfind(UTF8ToUTF16(autofill::kShippingDesignatorRe)); | |
| 321 | |
| 322 if (bill == string16::npos && ship == string16::npos) | |
| 323 return kGenericAddress; | |
| 324 | |
| 325 if (bill != string16::npos && ship == string16::npos) | |
| 326 return kBillingAddress; | |
| 327 | |
| 328 if (bill == string16::npos && ship != string16::npos) | |
| 329 return kShippingAddress; | |
| 330 | |
| 331 if (bill > ship) | |
| 332 return kBillingAddress; | |
| 333 | |
| 334 return kShippingAddress; | |
| 335 } | |
| OLD | NEW |