OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/autofill/address_field.h" | 5 #include "chrome/browser/autofill/address_field.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 | 8 |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
78 | 78 |
79 // Allow address fields to appear in any order. | 79 // Allow address fields to appear in any order. |
80 while (!scanner->IsEnd()) { | 80 while (!scanner->IsEnd()) { |
81 if (ParseCompany(scanner, is_ecml, address_field.get()) || | 81 if (ParseCompany(scanner, is_ecml, address_field.get()) || |
82 ParseAddressLines(scanner, is_ecml, address_field.get()) || | 82 ParseAddressLines(scanner, is_ecml, address_field.get()) || |
83 ParseCity(scanner, is_ecml, address_field.get()) || | 83 ParseCity(scanner, is_ecml, address_field.get()) || |
84 ParseState(scanner, is_ecml, address_field.get()) || | 84 ParseState(scanner, is_ecml, address_field.get()) || |
85 ParseZipCode(scanner, is_ecml, address_field.get()) || | 85 ParseZipCode(scanner, is_ecml, address_field.get()) || |
86 ParseCountry(scanner, is_ecml, address_field.get())) { | 86 ParseCountry(scanner, is_ecml, address_field.get())) { |
87 continue; | 87 continue; |
88 } else if (ParseText(scanner, attention_ignored) || | 88 } else if (ParseText(scanner, attention_ignored, |
89 ParseText(scanner, region_ignored)) { | 89 MATCH_NAME | MATCH_LABEL | MATCH_TEXT) || |
| 90 ParseText(scanner, region_ignored, |
| 91 MATCH_NAME | MATCH_LABEL | MATCH_TEXT)) { |
90 // We ignore the following: | 92 // We ignore the following: |
91 // * Attention. | 93 // * Attention. |
92 // * Province/Region/Other. | 94 // * Province/Region/Other. |
93 continue; | 95 continue; |
94 } else if (scanner->Cursor() != initial_field && ParseEmpty(scanner)) { | 96 } else if (scanner->Cursor() != initial_field && ParseEmpty(scanner)) { |
95 // Ignore non-labeled fields within an address; the page | 97 // Ignore non-labeled fields within an address; the page |
96 // MapQuest Driving Directions North America.html contains such a field. | 98 // MapQuest Driving Directions North America.html contains such a field. |
97 // We only ignore such fields after we've parsed at least one other field; | 99 // We only ignore such fields after we've parsed at least one other field; |
98 // otherwise we'd effectively parse address fields before other field | 100 // otherwise we'd effectively parse address fields before other field |
99 // types after any non-labeled fields, and we want email address fields to | 101 // types after any non-labeled fields, and we want email address fields to |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
159 return false; | 161 return false; |
160 | 162 |
161 string16 pattern; | 163 string16 pattern; |
162 if (is_ecml) { | 164 if (is_ecml) { |
163 pattern = GetEcmlPattern(kEcmlShipToCompanyName, | 165 pattern = GetEcmlPattern(kEcmlShipToCompanyName, |
164 kEcmlBillToCompanyName, '|'); | 166 kEcmlBillToCompanyName, '|'); |
165 } else { | 167 } else { |
166 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE); | 168 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE); |
167 } | 169 } |
168 | 170 |
169 return ParseText(scanner, pattern, &address_field->company_); | 171 return ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 172 &address_field->company_); |
170 } | 173 } |
171 | 174 |
172 // static | 175 // static |
173 bool AddressField::ParseAddressLines(AutofillScanner* scanner, | 176 bool AddressField::ParseAddressLines(AutofillScanner* scanner, |
174 bool is_ecml, | 177 bool is_ecml, |
175 AddressField* address_field) { | 178 AddressField* address_field) { |
176 // We only match the string "address" in page text, not in element names, | 179 // We only match the string "address" in page text, not in element names, |
177 // because sometimes every element in a group of address fields will have | 180 // because sometimes every element in a group of address fields will have |
178 // a name containing the string "address"; for example, on the page | 181 // a name containing the string "address"; for example, on the page |
179 // Kohl's - Register Billing Address.html the text element labeled "city" | 182 // Kohl's - Register Billing Address.html the text element labeled "city" |
180 // has the name "BILL_TO_ADDRESS<>city". We do match address labels | 183 // has the name "BILL_TO_ADDRESS<>city". We do match address labels |
181 // such as "address1", which appear as element names on various pages (eg | 184 // such as "address1", which appear as element names on various pages (eg |
182 // AmericanGirl-Registration.html, BloomingdalesBilling.html, | 185 // AmericanGirl-Registration.html, BloomingdalesBilling.html, |
183 // EBay Registration Enter Information.html). | 186 // EBay Registration Enter Information.html). |
184 if (address_field->address1_) | 187 if (address_field->address1_) |
185 return false; | 188 return false; |
186 | 189 |
187 string16 pattern; | 190 string16 pattern; |
188 if (is_ecml) { | 191 if (is_ecml) { |
189 pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|'); | 192 pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|'); |
190 if (!ParseText(scanner, pattern, &address_field->address1_)) | 193 if (!ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 194 &address_field->address1_)) |
191 return false; | 195 return false; |
192 } else { | 196 } else { |
193 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE); | 197 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE); |
194 string16 label_pattern = | 198 string16 label_pattern = |
195 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); | 199 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); |
196 | 200 |
197 if (!ParseText(scanner, pattern, &address_field->address1_) && | 201 if (!ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
198 !ParseLabelText(scanner, label_pattern, &address_field->address1_)) | 202 &address_field->address1_) && |
| 203 !ParseText(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
| 204 &address_field->address1_)) |
199 return false; | 205 return false; |
200 } | 206 } |
201 | 207 |
202 // Optionally parse more address lines, which may have empty labels. | 208 // Optionally parse more address lines, which may have empty labels. |
203 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) | 209 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) |
204 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! | 210 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! |
205 if (is_ecml) { | 211 if (is_ecml) { |
206 pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|'); | 212 pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|'); |
207 if (!ParseEmptyText(scanner, &address_field->address2_)) | 213 if (!ParseEmptyText(scanner, &address_field->address2_)) |
208 ParseText(scanner, pattern, &address_field->address2_); | 214 ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 215 &address_field->address2_); |
209 } else { | 216 } else { |
210 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE); | 217 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE); |
211 string16 label_pattern = | 218 string16 label_pattern = |
212 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); | 219 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); |
213 if (!ParseEmptyText(scanner, &address_field->address2_) && | 220 if (!ParseEmptyText(scanner, &address_field->address2_) && |
214 !ParseText(scanner, pattern, &address_field->address2_)) | 221 !ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
215 ParseLabelText(scanner, label_pattern, &address_field->address2_); | 222 &address_field->address2_)) |
| 223 ParseText(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, |
| 224 &address_field->address2_); |
216 } | 225 } |
217 | 226 |
218 // Try for a third line, which we will promptly discard. | 227 // Try for a third line, which we will promptly discard. |
219 if (address_field->address2_ != NULL) { | 228 if (address_field->address2_ != NULL) { |
220 if (is_ecml) { | 229 if (is_ecml) { |
221 pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|'); | 230 pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|'); |
222 ParseText(scanner, pattern); | 231 ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT); |
223 } else { | 232 } else { |
224 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE); | 233 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE); |
225 if (!ParseEmptyText(scanner, NULL)) | 234 if (!ParseEmptyText(scanner, NULL)) |
226 ParseText(scanner, pattern, NULL); | 235 ParseText(scanner, pattern, |
| 236 MATCH_NAME | MATCH_LABEL | MATCH_TEXT, NULL); |
227 } | 237 } |
228 } | 238 } |
229 | 239 |
230 return true; | 240 return true; |
231 } | 241 } |
232 | 242 |
233 // static | 243 // static |
234 bool AddressField::ParseCountry(AutofillScanner* scanner, | 244 bool AddressField::ParseCountry(AutofillScanner* scanner, |
235 bool is_ecml, | 245 bool is_ecml, |
236 AddressField* address_field) { | 246 AddressField* address_field) { |
237 // Parse a country. The occasional page (e.g. | 247 // Parse a country. The occasional page (e.g. |
238 // Travelocity_New Member Information1.html) calls this a "location". | 248 // Travelocity_New Member Information1.html) calls this a "location". |
239 // Note: ECML standard uses 2 letter country code (ISO 3166) | 249 // Note: ECML standard uses 2 letter country code (ISO 3166) |
240 if (address_field->country_ && !address_field->country_->IsEmpty()) | 250 if (address_field->country_ && !address_field->country_->IsEmpty()) |
241 return false; | 251 return false; |
242 | 252 |
243 string16 pattern; | 253 string16 pattern; |
244 if (is_ecml) | 254 if (is_ecml) |
245 pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|'); | 255 pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|'); |
246 else | 256 else |
247 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE); | 257 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE); |
248 | 258 |
249 return ParseText(scanner, pattern, &address_field->country_); | 259 return ParseText(scanner, pattern, |
| 260 MATCH_NAME | MATCH_LABEL | MATCH_TEXT | MATCH_SELECT, |
| 261 &address_field->country_); |
250 } | 262 } |
251 | 263 |
252 // static | 264 // static |
253 bool AddressField::ParseZipCode(AutofillScanner* scanner, | 265 bool AddressField::ParseZipCode(AutofillScanner* scanner, |
254 bool is_ecml, | 266 bool is_ecml, |
255 AddressField* address_field) { | 267 AddressField* address_field) { |
256 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this | 268 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this |
257 // is called a "post code". | 269 // is called a "post code". |
258 // | 270 // |
259 // HACK: Just for the MapQuest driving directions page we match the | 271 // HACK: Just for the MapQuest driving directions page we match the |
(...skipping 18 matching lines...) Expand all Loading... |
278 // more detail. | 290 // more detail. |
279 string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode)); | 291 string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode)); |
280 if (StartsWith(name, bill_to_postal_code_field, false)) { | 292 if (StartsWith(name, bill_to_postal_code_field, false)) { |
281 tempType = kBillingAddress; | 293 tempType = kBillingAddress; |
282 } else if (StartsWith(name, bill_to_postal_code_field, false)) { | 294 } else if (StartsWith(name, bill_to_postal_code_field, false)) { |
283 tempType = kShippingAddress; | 295 tempType = kShippingAddress; |
284 } else { | 296 } else { |
285 tempType = kGenericAddress; | 297 tempType = kGenericAddress; |
286 } | 298 } |
287 | 299 |
288 if (!ParseText(scanner, pattern, &address_field->zip_)) | 300 if (!ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
| 301 &address_field->zip_)) |
289 return false; | 302 return false; |
290 | 303 |
291 address_field->type_ = tempType; | 304 address_field->type_ = tempType; |
292 if (!is_ecml) { | 305 if (!is_ecml) { |
293 // Look for a zip+4, whose field name will also often contain | 306 // Look for a zip+4, whose field name will also often contain |
294 // the substring "zip". | 307 // the substring "zip". |
295 ParseText(scanner, | 308 ParseText(scanner, |
296 l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE), | 309 l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE), |
| 310 MATCH_NAME | MATCH_LABEL | MATCH_TEXT, |
297 &address_field->zip4_); | 311 &address_field->zip4_); |
298 } | 312 } |
299 | 313 |
300 return true; | 314 return true; |
301 } | 315 } |
302 | 316 |
303 // static | 317 // static |
304 bool AddressField::ParseCity(AutofillScanner* scanner, | 318 bool AddressField::ParseCity(AutofillScanner* scanner, |
305 bool is_ecml, | 319 bool is_ecml, |
306 AddressField* address_field) { | 320 AddressField* address_field) { |
307 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use | 321 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use |
308 // the term "town". | 322 // the term "town". |
309 if (address_field->city_) | 323 if (address_field->city_) |
310 return false; | 324 return false; |
311 | 325 |
312 string16 pattern; | 326 string16 pattern; |
313 if (is_ecml) | 327 if (is_ecml) |
314 pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|'); | 328 pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|'); |
315 else | 329 else |
316 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE); | 330 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE); |
317 | 331 |
318 return ParseText(scanner, pattern, &address_field->city_); | 332 return ParseText(scanner, pattern, |
| 333 MATCH_NAME | MATCH_LABEL | MATCH_TEXT | MATCH_SELECT, |
| 334 &address_field->city_); |
319 } | 335 } |
320 | 336 |
321 // static | 337 // static |
322 bool AddressField::ParseState(AutofillScanner* scanner, | 338 bool AddressField::ParseState(AutofillScanner* scanner, |
323 bool is_ecml, | 339 bool is_ecml, |
324 AddressField* address_field) { | 340 AddressField* address_field) { |
325 if (address_field->state_) | 341 if (address_field->state_) |
326 return false; | 342 return false; |
327 | 343 |
328 string16 pattern; | 344 string16 pattern; |
329 if (is_ecml) | 345 if (is_ecml) |
330 pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|'); | 346 pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|'); |
331 else | 347 else |
332 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE); | 348 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE); |
333 | 349 |
334 return ParseText(scanner, pattern, &address_field->state_); | 350 return ParseText(scanner, pattern, |
| 351 MATCH_NAME | MATCH_LABEL | MATCH_TEXT | MATCH_SELECT, |
| 352 &address_field->state_); |
335 } | 353 } |
336 | 354 |
337 AddressType AddressField::AddressTypeFromText(const string16 &text) { | 355 AddressType AddressField::AddressTypeFromText(const string16 &text) { |
338 if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE)) | 356 if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE)) |
339 != string16::npos || | 357 != string16::npos || |
340 text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE)) | 358 text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE)) |
341 != string16::npos) | 359 != string16::npos) |
342 // This text could be a checkbox label such as "same as my billing | 360 // This text could be a checkbox label such as "same as my billing |
343 // address" or "use my shipping address". | 361 // address" or "use my shipping address". |
344 // ++ It would help if we generally skipped all text that appears | 362 // ++ It would help if we generally skipped all text that appears |
(...skipping 15 matching lines...) Expand all Loading... |
360 return kBillingAddress; | 378 return kBillingAddress; |
361 | 379 |
362 if (bill == string16::npos && ship != string16::npos) | 380 if (bill == string16::npos && ship != string16::npos) |
363 return kShippingAddress; | 381 return kShippingAddress; |
364 | 382 |
365 if (bill > ship) | 383 if (bill > ship) |
366 return kBillingAddress; | 384 return kBillingAddress; |
367 | 385 |
368 return kShippingAddress; | 386 return kShippingAddress; |
369 } | 387 } |
OLD | NEW |