Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(276)

Side by Side Diff: components/autofill/core/browser/address_field.cc

Issue 1518783002: Revert of autofill: switch autofill_regexes to RE2 library (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/autofill.gypi ('k') | components/autofill/core/browser/autofill_regex_constants.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/autofill/core/browser/address_field.h" 5 #include "components/autofill/core/browser/address_field.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
11 #include "base/strings/string16.h"
11 #include "base/strings/string_util.h" 12 #include "base/strings/string_util.h"
13 #include "base/strings/utf_string_conversions.h"
12 #include "components/autofill/core/browser/autofill_field.h" 14 #include "components/autofill/core/browser/autofill_field.h"
13 #include "components/autofill/core/browser/autofill_regex_constants.h" 15 #include "components/autofill/core/browser/autofill_regex_constants.h"
14 #include "components/autofill/core/browser/autofill_scanner.h" 16 #include "components/autofill/core/browser/autofill_scanner.h"
15 #include "components/autofill/core/browser/field_types.h" 17 #include "components/autofill/core/browser/field_types.h"
16 18
19 using base::UTF8ToUTF16;
20
17 namespace autofill { 21 namespace autofill {
18 22
19 namespace { 23 namespace {
20 24
21 bool SetFieldAndAdvanceCursor(AutofillScanner* scanner, AutofillField** field) { 25 bool SetFieldAndAdvanceCursor(AutofillScanner* scanner, AutofillField** field) {
22 *field = scanner->Cursor(); 26 *field = scanner->Cursor();
23 scanner->Advance(); 27 scanner->Advance();
24 return true; 28 return true;
25 } 29 }
26 30
(...skipping 10 matching lines...) Expand all
37 const int AddressField::kStateMatchType = MATCH_DEFAULT | MATCH_SELECT; 41 const int AddressField::kStateMatchType = MATCH_DEFAULT | MATCH_SELECT;
38 42
39 scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) { 43 scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) {
40 if (scanner->IsEnd()) 44 if (scanner->IsEnd())
41 return NULL; 45 return NULL;
42 46
43 scoped_ptr<AddressField> address_field(new AddressField); 47 scoped_ptr<AddressField> address_field(new AddressField);
44 const AutofillField* const initial_field = scanner->Cursor(); 48 const AutofillField* const initial_field = scanner->Cursor();
45 size_t saved_cursor = scanner->SaveCursor(); 49 size_t saved_cursor = scanner->SaveCursor();
46 50
51 base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe);
52 base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe);
53
47 // Allow address fields to appear in any order. 54 // Allow address fields to appear in any order.
48 size_t begin_trailing_non_labeled_fields = 0; 55 size_t begin_trailing_non_labeled_fields = 0;
49 bool has_trailing_non_labeled_fields = false; 56 bool has_trailing_non_labeled_fields = false;
50 while (!scanner->IsEnd()) { 57 while (!scanner->IsEnd()) {
51 const size_t cursor = scanner->SaveCursor(); 58 const size_t cursor = scanner->SaveCursor();
52 if (address_field->ParseAddressLines(scanner) || 59 if (address_field->ParseAddressLines(scanner) ||
53 address_field->ParseCityStateZipCode(scanner) || 60 address_field->ParseCityStateZipCode(scanner) ||
54 address_field->ParseCountry(scanner) || 61 address_field->ParseCountry(scanner) ||
55 address_field->ParseCompany(scanner)) { 62 address_field->ParseCompany(scanner)) {
56 has_trailing_non_labeled_fields = false; 63 has_trailing_non_labeled_fields = false;
57 continue; 64 continue;
58 } else if (ParseField(scanner, kAttentionIgnoredRe, NULL) || 65 } else if (ParseField(scanner, attention_ignored, NULL) ||
59 ParseField(scanner, kRegionIgnoredRe, NULL)) { 66 ParseField(scanner, region_ignored, NULL)) {
60 // We ignore the following: 67 // We ignore the following:
61 // * Attention. 68 // * Attention.
62 // * Province/Region/Other. 69 // * Province/Region/Other.
63 continue; 70 continue;
64 } else if (scanner->Cursor() != initial_field && 71 } else if (scanner->Cursor() != initial_field &&
65 ParseEmptyLabel(scanner, NULL)) { 72 ParseEmptyLabel(scanner, NULL)) {
66 // Ignore non-labeled fields within an address; the page 73 // Ignore non-labeled fields within an address; the page
67 // MapQuest Driving Directions North America.html contains such a field. 74 // MapQuest Driving Directions North America.html contains such a field.
68 // We only ignore such fields after we've parsed at least one other field; 75 // We only ignore such fields after we've parsed at least one other field;
69 // otherwise we'd effectively parse address fields before other field 76 // otherwise we'd effectively parse address fields before other field
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
134 AddClassification(city_, ADDRESS_HOME_CITY, map) && 141 AddClassification(city_, ADDRESS_HOME_CITY, map) &&
135 AddClassification(state_, ADDRESS_HOME_STATE, map) && 142 AddClassification(state_, ADDRESS_HOME_STATE, map) &&
136 AddClassification(zip_, ADDRESS_HOME_ZIP, map) && 143 AddClassification(zip_, ADDRESS_HOME_ZIP, map) &&
137 AddClassification(country_, ADDRESS_HOME_COUNTRY, map); 144 AddClassification(country_, ADDRESS_HOME_COUNTRY, map);
138 } 145 }
139 146
140 bool AddressField::ParseCompany(AutofillScanner* scanner) { 147 bool AddressField::ParseCompany(AutofillScanner* scanner) {
141 if (company_ && !company_->IsEmpty()) 148 if (company_ && !company_->IsEmpty())
142 return false; 149 return false;
143 150
144 return ParseField(scanner, kCompanyRe, &company_); 151 return ParseField(scanner, UTF8ToUTF16(kCompanyRe), &company_);
145 } 152 }
146 153
147 bool AddressField::ParseAddressLines(AutofillScanner* scanner) { 154 bool AddressField::ParseAddressLines(AutofillScanner* scanner) {
148 // We only match the string "address" in page text, not in element names, 155 // We only match the string "address" in page text, not in element names,
149 // because sometimes every element in a group of address fields will have 156 // because sometimes every element in a group of address fields will have
150 // a name containing the string "address"; for example, on the page 157 // a name containing the string "address"; for example, on the page
151 // Kohl's - Register Billing Address.html the text element labeled "city" 158 // Kohl's - Register Billing Address.html the text element labeled "city"
152 // has the name "BILL_TO_ADDRESS<>city". We do match address labels 159 // has the name "BILL_TO_ADDRESS<>city". We do match address labels
153 // such as "address1", which appear as element names on various pages (eg 160 // such as "address1", which appear as element names on various pages (eg
154 // AmericanGirl-Registration.html, BloomingdalesBilling.html, 161 // AmericanGirl-Registration.html, BloomingdalesBilling.html,
155 // EBay Registration Enter Information.html). 162 // EBay Registration Enter Information.html).
156 if (address1_ || street_address_) 163 if (address1_ || street_address_)
157 return false; 164 return false;
158 165
159 // Ignore "Address Lookup" field. http://crbug.com/427622 166 // Ignore "Address Lookup" field. http://crbug.com/427622
160 if (ParseField(scanner, kAddressLookupRe, NULL)) 167 if (ParseField(scanner, base::UTF8ToUTF16(kAddressLookupRe), NULL))
161 return false; 168 return false;
162 169
163 if (!ParseFieldSpecifics(scanner, kAddressLine1Re, MATCH_DEFAULT, 170 base::string16 pattern = UTF8ToUTF16(kAddressLine1Re);
171 base::string16 label_pattern = UTF8ToUTF16(kAddressLine1LabelRe);
172 if (!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, &address1_) &&
173 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
164 &address1_) && 174 &address1_) &&
165 !ParseFieldSpecifics(scanner, kAddressLine1LabelRe, 175 !ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_TEXT_AREA,
166 MATCH_LABEL | MATCH_TEXT, &address1_) && 176 &street_address_) &&
167 !ParseFieldSpecifics(scanner, kAddressLine1Re, 177 !ParseFieldSpecifics(scanner, label_pattern,
168 MATCH_DEFAULT | MATCH_TEXT_AREA, &street_address_) && 178 MATCH_LABEL | MATCH_TEXT_AREA,
169 !ParseFieldSpecifics(scanner, kAddressLine1LabelRe, 179 &street_address_))
170 MATCH_LABEL | MATCH_TEXT_AREA, &street_address_))
171 return false; 180 return false;
172 181
173 if (street_address_) 182 if (street_address_)
174 return true; 183 return true;
175 184
176 // This code may not pick up pages that have an address field consisting of a 185 // This code may not pick up pages that have an address field consisting of a
177 // sequence of unlabeled address fields. If we need to add this, see 186 // sequence of unlabeled address fields. If we need to add this, see
178 // discussion on https://codereview.chromium.org/741493003/ 187 // discussion on https://codereview.chromium.org/741493003/
179 if (!ParseField(scanner, kAddressLine2Re, &address2_) && 188 pattern = UTF8ToUTF16(kAddressLine2Re);
180 !ParseFieldSpecifics(scanner, kAddressLine2LabelRe, 189 label_pattern = UTF8ToUTF16(kAddressLine2LabelRe);
181 MATCH_LABEL | MATCH_TEXT, &address2_)) 190 if (!ParseField(scanner, pattern, &address2_) &&
191 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
192 &address2_))
182 return true; 193 return true;
183 194
184 // Optionally parse address line 3. This uses the same label regexp as 195 // Optionally parse address line 3. This uses the same label regexp as
185 // address 2 above. 196 // address 2 above.
186 if (!ParseField(scanner, kAddressLinesExtraRe, &address3_) && 197 pattern = UTF8ToUTF16(kAddressLinesExtraRe);
187 !ParseFieldSpecifics(scanner, kAddressLine2LabelRe, 198 if (!ParseField(scanner, pattern, &address3_) &&
188 MATCH_LABEL | MATCH_TEXT, &address3_)) 199 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
200 &address3_))
189 return true; 201 return true;
190 202
191 // Try for surplus lines, which we will promptly discard. Some pages have 4 203 // Try for surplus lines, which we will promptly discard. Some pages have 4
192 // address lines (e.g. uk/ShoesDirect2.html)! 204 // address lines (e.g. uk/ShoesDirect2.html)!
193 // 205 //
194 // Since these are rare, don't bother considering unlabeled lines as extra 206 // Since these are rare, don't bother considering unlabeled lines as extra
195 // address lines. 207 // address lines.
196 while (ParseField(scanner, kAddressLinesExtraRe, NULL)) { 208 pattern = UTF8ToUTF16(kAddressLinesExtraRe);
209 while (ParseField(scanner, pattern, NULL)) {
197 // Consumed a surplus line, try for another. 210 // Consumed a surplus line, try for another.
198 } 211 }
199 return true; 212 return true;
200 } 213 }
201 214
202 bool AddressField::ParseCountry(AutofillScanner* scanner) { 215 bool AddressField::ParseCountry(AutofillScanner* scanner) {
203 if (country_ && !country_->IsEmpty()) 216 if (country_ && !country_->IsEmpty())
204 return false; 217 return false;
205 218
206 scanner->SaveCursor(); 219 scanner->SaveCursor();
207 if (ParseFieldSpecifics(scanner, kCountryRe, MATCH_DEFAULT | MATCH_SELECT, 220 if (ParseFieldSpecifics(scanner,
221 UTF8ToUTF16(kCountryRe),
222 MATCH_DEFAULT | MATCH_SELECT,
208 &country_)) { 223 &country_)) {
209 return true; 224 return true;
210 } 225 }
211 226
212 // The occasional page (e.g. google account registration page) calls this a 227 // The occasional page (e.g. google account registration page) calls this a
213 // "location". However, this only makes sense for select tags. 228 // "location". However, this only makes sense for select tags.
214 scanner->Rewind(); 229 scanner->Rewind();
215 return ParseFieldSpecifics(scanner, kCountryLocationRe, 230 return ParseFieldSpecifics(scanner,
231 UTF8ToUTF16(kCountryLocationRe),
216 MATCH_LABEL | MATCH_NAME | MATCH_SELECT, 232 MATCH_LABEL | MATCH_NAME | MATCH_SELECT,
217 &country_); 233 &country_);
218 } 234 }
219 235
220 bool AddressField::ParseZipCode(AutofillScanner* scanner) { 236 bool AddressField::ParseZipCode(AutofillScanner* scanner) {
221 if (zip_) 237 if (zip_)
222 return false; 238 return false;
223 239
224 if (!ParseFieldSpecifics(scanner, kZipCodeRe, kZipCodeMatchType, &zip_)) { 240 if (!ParseFieldSpecifics(scanner,
241 UTF8ToUTF16(kZipCodeRe),
242 kZipCodeMatchType,
243 &zip_)) {
225 return false; 244 return false;
226 } 245 }
227 246
228 // Look for a zip+4, whose field name will also often contain 247 // Look for a zip+4, whose field name will also often contain
229 // the substring "zip". 248 // the substring "zip".
230 ParseFieldSpecifics(scanner, kZip4Re, kZipCodeMatchType, &zip4_); 249 ParseFieldSpecifics(scanner, UTF8ToUTF16(kZip4Re), kZipCodeMatchType, &zip4_);
231 return true; 250 return true;
232 } 251 }
233 252
234 bool AddressField::ParseCity(AutofillScanner* scanner) { 253 bool AddressField::ParseCity(AutofillScanner* scanner) {
235 if (city_) 254 if (city_)
236 return false; 255 return false;
237 256
238 return ParseFieldSpecifics(scanner, kCityRe, kCityMatchType, &city_); 257 return ParseFieldSpecifics(scanner,
258 UTF8ToUTF16(kCityRe),
259 kCityMatchType,
260 &city_);
239 } 261 }
240 262
241 bool AddressField::ParseState(AutofillScanner* scanner) { 263 bool AddressField::ParseState(AutofillScanner* scanner) {
242 if (state_) 264 if (state_)
243 return false; 265 return false;
244 266
245 // Ignore spurious matches for "United States". 267 return ParseFieldSpecifics(scanner,
246 size_t saved_cursor = scanner->SaveCursor(); 268 UTF8ToUTF16(kStateRe),
247 if (ParseFieldSpecifics(scanner, "United States", kStateMatchType, nullptr)) { 269 kStateMatchType,
248 scanner->RewindTo(saved_cursor); 270 &state_);
249 return false;
250 }
251
252 return ParseFieldSpecifics(scanner, kStateRe, kStateMatchType, &state_);
253 } 271 }
254 272
255 bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) { 273 bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) {
256 // Simple cases. 274 // Simple cases.
257 if (scanner->IsEnd()) 275 if (scanner->IsEnd())
258 return false; 276 return false;
259 if (city_ && state_ && zip_) 277 if (city_ && state_ && zip_)
260 return false; 278 return false;
261 if (state_ && zip_) 279 if (state_ && zip_)
262 return ParseCity(scanner); 280 return ParseCity(scanner);
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
304 322
305 return false; 323 return false;
306 } 324 }
307 325
308 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode( 326 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode(
309 AutofillScanner* scanner) { 327 AutofillScanner* scanner) {
310 if (zip_) 328 if (zip_)
311 return RESULT_MATCH_NONE; 329 return RESULT_MATCH_NONE;
312 330
313 ParseNameLabelResult result = ParseNameAndLabelSeparately( 331 ParseNameLabelResult result = ParseNameAndLabelSeparately(
314 scanner, kZipCodeRe, kZipCodeMatchType, &zip_); 332 scanner, UTF8ToUTF16(kZipCodeRe), kZipCodeMatchType, &zip_);
315 333
316 if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd()) 334 if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd())
317 return result; 335 return result;
318 336
319 size_t saved_cursor = scanner->SaveCursor(); 337 size_t saved_cursor = scanner->SaveCursor();
320 bool found_non_zip4 = ParseCity(scanner); 338 bool found_non_zip4 = ParseCity(scanner);
321 if (found_non_zip4) 339 if (found_non_zip4)
322 city_ = nullptr; 340 city_ = nullptr;
323 scanner->RewindTo(saved_cursor); 341 scanner->RewindTo(saved_cursor);
324 if (!found_non_zip4) { 342 if (!found_non_zip4) {
325 found_non_zip4 = ParseState(scanner); 343 found_non_zip4 = ParseState(scanner);
326 if (found_non_zip4) 344 if (found_non_zip4)
327 state_ = nullptr; 345 state_ = nullptr;
328 scanner->RewindTo(saved_cursor); 346 scanner->RewindTo(saved_cursor);
329 } 347 }
330 348
331 if (!found_non_zip4) { 349 if (!found_non_zip4) {
332 // Look for a zip+4, whose field name will also often contain 350 // Look for a zip+4, whose field name will also often contain
333 // the substring "zip". 351 // the substring "zip".
334 ParseFieldSpecifics(scanner, kZip4Re, kZipCodeMatchType, &zip4_); 352 ParseFieldSpecifics(scanner,
353 UTF8ToUTF16(kZip4Re),
354 kZipCodeMatchType,
355 &zip4_);
335 } 356 }
336 return result; 357 return result;
337 } 358 }
338 359
339 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity( 360 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity(
340 AutofillScanner* scanner) { 361 AutofillScanner* scanner) {
341 if (city_) 362 if (city_)
342 return RESULT_MATCH_NONE; 363 return RESULT_MATCH_NONE;
343 364
344 return ParseNameAndLabelSeparately(scanner, kCityRe, kCityMatchType, &city_); 365 return ParseNameAndLabelSeparately(
366 scanner, UTF8ToUTF16(kCityRe), kCityMatchType, &city_);
345 } 367 }
346 368
347 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState( 369 AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState(
348 AutofillScanner* scanner) { 370 AutofillScanner* scanner) {
349 if (state_) 371 if (state_)
350 return RESULT_MATCH_NONE; 372 return RESULT_MATCH_NONE;
351 373
352 size_t saved_cursor = scanner->SaveCursor(); 374 return ParseNameAndLabelSeparately(
353 if (ParseFieldSpecifics(scanner, "United States", kStateMatchType, nullptr)) { 375 scanner, UTF8ToUTF16(kStateRe), kStateMatchType, &state_);
354 scanner->RewindTo(saved_cursor);
355 return RESULT_MATCH_NONE;
356 }
357
358 return ParseNameAndLabelSeparately(scanner, kStateRe, kStateMatchType,
359 &state_);
360 } 376 }
361 377
362 } // namespace autofill 378 } // namespace autofill
OLDNEW
« no previous file with comments | « components/autofill.gypi ('k') | components/autofill/core/browser/autofill_regex_constants.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698