Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1048)

Side by Side Diff: chrome/browser/autofill/address_field.cc

Issue 7014011: Change heuristic regex and order to match grabber-continental. (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Move IsTextInput() check. Created 9 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | chrome/browser/autofill/credit_card_field.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/autofill/address_field.h" 5 #include "chrome/browser/autofill/address_field.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
78 78
79 // Allow address fields to appear in any order. 79 // Allow address fields to appear in any order.
80 while (!scanner->IsEnd()) { 80 while (!scanner->IsEnd()) {
81 if (ParseCompany(scanner, is_ecml, address_field.get()) || 81 if (ParseCompany(scanner, is_ecml, address_field.get()) ||
82 ParseAddressLines(scanner, is_ecml, address_field.get()) || 82 ParseAddressLines(scanner, is_ecml, address_field.get()) ||
83 ParseCity(scanner, is_ecml, address_field.get()) || 83 ParseCity(scanner, is_ecml, address_field.get()) ||
84 ParseState(scanner, is_ecml, address_field.get()) || 84 ParseState(scanner, is_ecml, address_field.get()) ||
85 ParseZipCode(scanner, is_ecml, address_field.get()) || 85 ParseZipCode(scanner, is_ecml, address_field.get()) ||
86 ParseCountry(scanner, is_ecml, address_field.get())) { 86 ParseCountry(scanner, is_ecml, address_field.get())) {
87 continue; 87 continue;
88 } else if (ParseText(scanner, attention_ignored) || 88 } else if (ParseText(scanner, attention_ignored,
89 ParseText(scanner, region_ignored)) { 89 MATCH_NAME | MATCH_LABEL | MATCH_TEXT) ||
90 ParseText(scanner, region_ignored,
91 MATCH_NAME | MATCH_LABEL | MATCH_TEXT)) {
90 // We ignore the following: 92 // We ignore the following:
91 // * Attention. 93 // * Attention.
92 // * Province/Region/Other. 94 // * Province/Region/Other.
93 continue; 95 continue;
94 } else if (scanner->Cursor() != initial_field && ParseEmpty(scanner)) { 96 } else if (scanner->Cursor() != initial_field && ParseEmpty(scanner)) {
95 // Ignore non-labeled fields within an address; the page 97 // Ignore non-labeled fields within an address; the page
96 // MapQuest Driving Directions North America.html contains such a field. 98 // MapQuest Driving Directions North America.html contains such a field.
97 // We only ignore such fields after we've parsed at least one other field; 99 // We only ignore such fields after we've parsed at least one other field;
98 // otherwise we'd effectively parse address fields before other field 100 // otherwise we'd effectively parse address fields before other field
99 // types after any non-labeled fields, and we want email address fields to 101 // types after any non-labeled fields, and we want email address fields to
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
159 return false; 161 return false;
160 162
161 string16 pattern; 163 string16 pattern;
162 if (is_ecml) { 164 if (is_ecml) {
163 pattern = GetEcmlPattern(kEcmlShipToCompanyName, 165 pattern = GetEcmlPattern(kEcmlShipToCompanyName,
164 kEcmlBillToCompanyName, '|'); 166 kEcmlBillToCompanyName, '|');
165 } else { 167 } else {
166 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE); 168 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE);
167 } 169 }
168 170
169 return ParseText(scanner, pattern, &address_field->company_); 171 return ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT,
172 &address_field->company_);
170 } 173 }
171 174
172 // static 175 // static
173 bool AddressField::ParseAddressLines(AutofillScanner* scanner, 176 bool AddressField::ParseAddressLines(AutofillScanner* scanner,
174 bool is_ecml, 177 bool is_ecml,
175 AddressField* address_field) { 178 AddressField* address_field) {
176 // We only match the string "address" in page text, not in element names, 179 // We only match the string "address" in page text, not in element names,
177 // because sometimes every element in a group of address fields will have 180 // because sometimes every element in a group of address fields will have
178 // a name containing the string "address"; for example, on the page 181 // a name containing the string "address"; for example, on the page
179 // Kohl's - Register Billing Address.html the text element labeled "city" 182 // Kohl's - Register Billing Address.html the text element labeled "city"
180 // has the name "BILL_TO_ADDRESS<>city". We do match address labels 183 // has the name "BILL_TO_ADDRESS<>city". We do match address labels
181 // such as "address1", which appear as element names on various pages (eg 184 // such as "address1", which appear as element names on various pages (eg
182 // AmericanGirl-Registration.html, BloomingdalesBilling.html, 185 // AmericanGirl-Registration.html, BloomingdalesBilling.html,
183 // EBay Registration Enter Information.html). 186 // EBay Registration Enter Information.html).
184 if (address_field->address1_) 187 if (address_field->address1_)
185 return false; 188 return false;
186 189
187 string16 pattern; 190 string16 pattern;
188 if (is_ecml) { 191 if (is_ecml) {
189 pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|'); 192 pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|');
190 if (!ParseText(scanner, pattern, &address_field->address1_)) 193 if (!ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT,
194 &address_field->address1_))
191 return false; 195 return false;
192 } else { 196 } else {
193 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE); 197 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE);
194 string16 label_pattern = 198 string16 label_pattern =
195 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); 199 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE);
196 200
197 if (!ParseText(scanner, pattern, &address_field->address1_) && 201 if (!ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT,
198 !ParseLabelText(scanner, label_pattern, &address_field->address1_)) 202 &address_field->address1_) &&
203 !ParseText(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
204 &address_field->address1_))
199 return false; 205 return false;
200 } 206 }
201 207
202 // Optionally parse more address lines, which may have empty labels. 208 // Optionally parse more address lines, which may have empty labels.
203 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) 209 // Some pages have 3 address lines (eg SharperImageModifyAccount.html)
204 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! 210 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)!
205 if (is_ecml) { 211 if (is_ecml) {
206 pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|'); 212 pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|');
207 if (!ParseEmptyText(scanner, &address_field->address2_)) 213 if (!ParseEmptyText(scanner, &address_field->address2_))
208 ParseText(scanner, pattern, &address_field->address2_); 214 ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT,
215 &address_field->address2_);
209 } else { 216 } else {
210 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE); 217 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE);
211 string16 label_pattern = 218 string16 label_pattern =
212 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); 219 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE);
213 if (!ParseEmptyText(scanner, &address_field->address2_) && 220 if (!ParseEmptyText(scanner, &address_field->address2_) &&
214 !ParseText(scanner, pattern, &address_field->address2_)) 221 !ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT,
215 ParseLabelText(scanner, label_pattern, &address_field->address2_); 222 &address_field->address2_))
223 ParseText(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
224 &address_field->address2_);
216 } 225 }
217 226
218 // Try for a third line, which we will promptly discard. 227 // Try for a third line, which we will promptly discard.
219 if (address_field->address2_ != NULL) { 228 if (address_field->address2_ != NULL) {
220 if (is_ecml) { 229 if (is_ecml) {
221 pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|'); 230 pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|');
222 ParseText(scanner, pattern); 231 ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT);
223 } else { 232 } else {
224 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE); 233 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE);
225 if (!ParseEmptyText(scanner, NULL)) 234 if (!ParseEmptyText(scanner, NULL))
226 ParseText(scanner, pattern, NULL); 235 ParseText(scanner, pattern,
236 MATCH_NAME | MATCH_LABEL | MATCH_TEXT, NULL);
227 } 237 }
228 } 238 }
229 239
230 return true; 240 return true;
231 } 241 }
232 242
233 // static 243 // static
234 bool AddressField::ParseCountry(AutofillScanner* scanner, 244 bool AddressField::ParseCountry(AutofillScanner* scanner,
235 bool is_ecml, 245 bool is_ecml,
236 AddressField* address_field) { 246 AddressField* address_field) {
237 // Parse a country. The occasional page (e.g. 247 // Parse a country. The occasional page (e.g.
238 // Travelocity_New Member Information1.html) calls this a "location". 248 // Travelocity_New Member Information1.html) calls this a "location".
239 // Note: ECML standard uses 2 letter country code (ISO 3166) 249 // Note: ECML standard uses 2 letter country code (ISO 3166)
240 if (address_field->country_ && !address_field->country_->IsEmpty()) 250 if (address_field->country_ && !address_field->country_->IsEmpty())
241 return false; 251 return false;
242 252
243 string16 pattern; 253 string16 pattern;
244 if (is_ecml) 254 if (is_ecml)
245 pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|'); 255 pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|');
246 else 256 else
247 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE); 257 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE);
248 258
249 return ParseText(scanner, pattern, &address_field->country_); 259 return ParseText(scanner, pattern,
260 MATCH_NAME | MATCH_LABEL | MATCH_TEXT | MATCH_SELECT,
261 &address_field->country_);
250 } 262 }
251 263
252 // static 264 // static
253 bool AddressField::ParseZipCode(AutofillScanner* scanner, 265 bool AddressField::ParseZipCode(AutofillScanner* scanner,
254 bool is_ecml, 266 bool is_ecml,
255 AddressField* address_field) { 267 AddressField* address_field) {
256 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this 268 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this
257 // is called a "post code". 269 // is called a "post code".
258 // 270 //
259 // HACK: Just for the MapQuest driving directions page we match the 271 // HACK: Just for the MapQuest driving directions page we match the
(...skipping 18 matching lines...) Expand all
278 // more detail. 290 // more detail.
279 string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode)); 291 string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode));
280 if (StartsWith(name, bill_to_postal_code_field, false)) { 292 if (StartsWith(name, bill_to_postal_code_field, false)) {
281 tempType = kBillingAddress; 293 tempType = kBillingAddress;
282 } else if (StartsWith(name, bill_to_postal_code_field, false)) { 294 } else if (StartsWith(name, bill_to_postal_code_field, false)) {
283 tempType = kShippingAddress; 295 tempType = kShippingAddress;
284 } else { 296 } else {
285 tempType = kGenericAddress; 297 tempType = kGenericAddress;
286 } 298 }
287 299
288 if (!ParseText(scanner, pattern, &address_field->zip_)) 300 if (!ParseText(scanner, pattern, MATCH_NAME | MATCH_LABEL | MATCH_TEXT,
301 &address_field->zip_))
289 return false; 302 return false;
290 303
291 address_field->type_ = tempType; 304 address_field->type_ = tempType;
292 if (!is_ecml) { 305 if (!is_ecml) {
293 // Look for a zip+4, whose field name will also often contain 306 // Look for a zip+4, whose field name will also often contain
294 // the substring "zip". 307 // the substring "zip".
295 ParseText(scanner, 308 ParseText(scanner,
296 l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE), 309 l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE),
310 MATCH_NAME | MATCH_LABEL | MATCH_TEXT,
297 &address_field->zip4_); 311 &address_field->zip4_);
298 } 312 }
299 313
300 return true; 314 return true;
301 } 315 }
302 316
303 // static 317 // static
304 bool AddressField::ParseCity(AutofillScanner* scanner, 318 bool AddressField::ParseCity(AutofillScanner* scanner,
305 bool is_ecml, 319 bool is_ecml,
306 AddressField* address_field) { 320 AddressField* address_field) {
307 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use 321 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use
308 // the term "town". 322 // the term "town".
309 if (address_field->city_) 323 if (address_field->city_)
310 return false; 324 return false;
311 325
312 string16 pattern; 326 string16 pattern;
313 if (is_ecml) 327 if (is_ecml)
314 pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|'); 328 pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|');
315 else 329 else
316 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE); 330 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE);
317 331
318 return ParseText(scanner, pattern, &address_field->city_); 332 return ParseText(scanner, pattern,
333 MATCH_NAME | MATCH_LABEL | MATCH_TEXT | MATCH_SELECT,
334 &address_field->city_);
319 } 335 }
320 336
321 // static 337 // static
322 bool AddressField::ParseState(AutofillScanner* scanner, 338 bool AddressField::ParseState(AutofillScanner* scanner,
323 bool is_ecml, 339 bool is_ecml,
324 AddressField* address_field) { 340 AddressField* address_field) {
325 if (address_field->state_) 341 if (address_field->state_)
326 return false; 342 return false;
327 343
328 string16 pattern; 344 string16 pattern;
329 if (is_ecml) 345 if (is_ecml)
330 pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|'); 346 pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|');
331 else 347 else
332 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE); 348 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE);
333 349
334 return ParseText(scanner, pattern, &address_field->state_); 350 return ParseText(scanner, pattern,
351 MATCH_NAME | MATCH_LABEL | MATCH_TEXT | MATCH_SELECT,
352 &address_field->state_);
335 } 353 }
336 354
337 AddressType AddressField::AddressTypeFromText(const string16 &text) { 355 AddressType AddressField::AddressTypeFromText(const string16 &text) {
338 if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE)) 356 if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE))
339 != string16::npos || 357 != string16::npos ||
340 text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE)) 358 text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE))
341 != string16::npos) 359 != string16::npos)
342 // This text could be a checkbox label such as "same as my billing 360 // This text could be a checkbox label such as "same as my billing
343 // address" or "use my shipping address". 361 // address" or "use my shipping address".
344 // ++ It would help if we generally skipped all text that appears 362 // ++ It would help if we generally skipped all text that appears
(...skipping 15 matching lines...) Expand all
360 return kBillingAddress; 378 return kBillingAddress;
361 379
362 if (bill == string16::npos && ship != string16::npos) 380 if (bill == string16::npos && ship != string16::npos)
363 return kShippingAddress; 381 return kShippingAddress;
364 382
365 if (bill > ship) 383 if (bill > ship)
366 return kBillingAddress; 384 return kBillingAddress;
367 385
368 return kShippingAddress; 386 return kShippingAddress;
369 } 387 }
OLDNEW
« no previous file with comments | « no previous file | chrome/browser/autofill/credit_card_field.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698