Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(247)

Side by Side Diff: chrome/browser/autofill/credit_card_field.cc

Issue 7891020: Make autofill regular expressions unicode again. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: remove extra gyp change Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/autofill/credit_card_field.h" 5 #include "chrome/browser/autofill/credit_card_field.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
11 #include "base/string16.h" 11 #include "base/string16.h"
12 #include "base/string_util.h" 12 #include "base/string_util.h"
13 #include "base/utf_string_conversions.h" 13 #include "base/utf_string_conversions.h"
14 #include "chrome/browser/autofill/autofill_field.h" 14 #include "chrome/browser/autofill/autofill_field.h"
15 #include "chrome/browser/autofill/autofill_regex_constants.h"
15 #include "chrome/browser/autofill/autofill_scanner.h" 16 #include "chrome/browser/autofill/autofill_scanner.h"
16 #include "chrome/browser/autofill/field_types.h" 17 #include "chrome/browser/autofill/field_types.h"
17 #include "ui/base/l10n/l10n_util.h" 18 #include "ui/base/l10n/l10n_util.h"
18 19
19 namespace {
20
21 // The UTF-8 version of these regular expressions are in
22 // regular_expressions.txt.
23 const char kNameOnCardRe[] =
24 "card.?holder|name.?on.?card|ccname|ccfullname|owner"
25 // de-DE
26 "|karteninhaber"
27 // es
28 "|nombre.*tarjeta"
29 // fr-FR
30 "|nom.*carte"
31 // it-IT
32 "|nome.*cart"
33 // ja-JP
34 "|\xe5\x90\x8d\xe5\x89\x8d"
35 // ru
36 "|\xd0\x98\xd0\xbc\xd1\x8f.*\xd0\xba\xd0\xb0\xd1\x80\xd1\x82\xd1\x8b"
37 // zh-CN
38 "|\xe4\xbf\xa1\xe7\x94\xa8\xe5\x8d\xa1\xe5\xbc\x80\xe6\x88\xb7\xe5\x90\x8d"
39 "|\xe5\xbc\x80\xe6\x88\xb7\xe5\x90\x8d|\xe6\x8c\x81\xe5\x8d\xa1\xe4"
40 "\xba\xba\xe5\xa7\x93\xe5\x90\x8d"
41 // zh-TW
42 "|\xe6\x8c\x81\xe5\x8d\xa1\xe4\xba\xba\xe5\xa7\x93\xe5\x90\x8d";
43 const char kNameOnCardContextualRe[] =
44 "name";
45 const char kCardNumberRe[] =
46 "card.?number|card.?#|card.?no|ccnum|acctnum"
47 // de-DE
48 "|nummer"
49 // es
50 "|credito|numero|n\xc3\xbamero"
51 // fr-FR
52 "|num\xc3\xa9ro"
53 // ja-JP
54 "|\xe3\x82\xab\xe3\x83\xbc\xe3\x83\x89\xe7\x95\xaa\xe5\x8f\xb7"
55 // ru
56 "|\xd0\x9d\xd0\xbe\xd0\xbc\xd0\xb5\xd1\x80.*\xd0\xba\xd0\xb0\xd1\x80\xd1"
57 "\x82\xd1\x8b"
58 // zh-CN
59 "|\xe4\xbf\xa1\xe7\x94\xa8\xe5\x8d\xa1\xe5\x8f\xb7|\xe4\xbf\xa1\xe7\x94"
60 "\xa8\xe5\x8d\xa1\xe5\x8f\xb7\xe7\xa0\x81"
61 // zh-TW
62 "|\xe4\xbf\xa1\xe7\x94\xa8\xe5\x8d\xa1\xe5\x8d\xa1\xe8\x99\x9f"
63 // ko-KR
64 "|\xec\xb9\xb4\xeb\x93\x9c";
65 const char kCardCvcRe[] =
66 "verification|card identification|security code|cvn|cvv|cvc|csc";
67
68 // "Expiration date" is the most common label here, but some pages have
69 // "Expires", "exp. date" or "exp. month" and "exp. year". We also look
70 // for the field names ccmonth and ccyear, which appear on at least 4 of
71 // our test pages.
72
73 // On at least one page (The China Shop2.html) we find only the labels
74 // "month" and "year". So for now we match these words directly; we'll
75 // see if this turns out to be too general.
76
77 // Toolbar Bug 51451: indeed, simply matching "month" is too general for
78 // https://rps.fidelity.com/ftgw/rps/RtlCust/CreatePIN/Init.
79 // Instead, we match only words beginning with "month".
80 const char kExpirationMonthRe[] =
81 "expir|exp.*mo|exp.*date|ccmonth"
82 // de-DE
83 "|gueltig|g\xc3\xbcltig|monat"
84 // es
85 "|fecha"
86 // fr-FR
87 "|date.*exp"
88 // it-IT
89 "|scadenza"
90 // ja-JP
91 "|\xe6\x9c\x89\xe5\x8a\xb9\xe6\x9c\x9f\xe9\x99\x90"
92 // pt-BR, pt-PT
93 "|validade"
94 // ru
95 "|\xd0\xa1\xd1\x80\xd0\xbe\xd0\xba \xd0\xb4\xd0\xb5\xd0\xb9\xd1\x81\xd1"
96 "\x82\xd0\xb2\xd0\xb8\xd1\x8f \xd0\xba\xd0\xb0\xd1\x80\xd1\x82\xd1\x8b"
97 // zh-CN
98 "|\xe6\x9c\x88";
99 const char kExpirationYearRe[] =
100 "exp|^/|year"
101 // de-DE
102 "|ablaufdatum|gueltig|g\xc3\xbcltig|yahr"
103 // es
104 "|fecha"
105 // it-IT
106 "|scadenza"
107 // ja-JP
108 "|\xe6\x9c\x89\xe5\x8a\xb9\xe6\x9c\x9f\xe9\x99\x90"
109 // pt-BR, pt-PT
110 "|validade"
111 // ru
112 "|\xd0\xa1\xd1\x80\xd0\xbe\xd0\xba \xd0\xb4\xd0\xb5\xd0\xb9\xd1\x81\xd1"
113 "\x82\xd0\xb2\xd0\xb8\xd1\x8f \xd0\xba\xd0\xb0\xd1\x80\xd1\x82\xd1\x8b"
114 // zh-CN
115 "|\xe5\xb9\xb4|\xe6\x9c\x89\xe6\x95\x88\xe6\x9c\x9f";
116
117 // This regex is a little bit nasty, but it is simply requiring exactly two
118 // adjacent y's.
119 const char kExpirationDate2DigitYearRe[] =
120 "exp.*date.*[^y]yy([^y]|$)";
121 const char kExpirationDateRe[] =
122 "expir|exp.*date"
123 // de-DE
124 "|gueltig|g\xc3\xbcltig"
125 // es
126 "|fecha"
127 // fr-FR
128 "|date.*exp"
129 // it-IT
130 "|scadenza"
131 // ja-JP
132 "|\xe6\x9c\x89\xe5\x8a\xb9\xe6\x9c\x9f\xe9\x99\x90"
133 // pt-BR, pt-PT
134 "|validade"
135 // ru
136 "|\xd0\xa1\xd1\x80\xd0\xbe\xd0\xba \xd0\xb4\xd0\xb5\xd0\xb9\xd1\x81\xd1"
137 "\x82\xd0\xb2\xd0\xb8\xd1\x8f\xd0\xba\xd0\xb0\xd1\x80\xd1\x82\xd1\x8b";
138 const char kCardIgnoredRe[] =
139 "^card";
140
141 } // namespace
142
143 // static 20 // static
144 FormField* CreditCardField::Parse(AutofillScanner* scanner) { 21 FormField* CreditCardField::Parse(AutofillScanner* scanner) {
145 if (scanner->IsEnd()) 22 if (scanner->IsEnd())
146 return NULL; 23 return NULL;
147 24
148 scoped_ptr<CreditCardField> credit_card_field(new CreditCardField); 25 scoped_ptr<CreditCardField> credit_card_field(new CreditCardField);
149 size_t saved_cursor = scanner->SaveCursor(); 26 size_t saved_cursor = scanner->SaveCursor();
150 27
151 // Credit card fields can appear in many different orders. 28 // Credit card fields can appear in many different orders.
152 // We loop until no more credit card related fields are found, see |break| at 29 // We loop until no more credit card related fields are found, see |break| at
153 // bottom of the loop. 30 // bottom of the loop.
154 for (int fields = 0; !scanner->IsEnd(); ++fields) { 31 for (int fields = 0; !scanner->IsEnd(); ++fields) {
155 // Sometimes the cardholder field is just labeled "name". Unfortunately this 32 // Sometimes the cardholder field is just labeled "name". Unfortunately this
156 // is a dangerously generic word to search for, since it will often match a 33 // is a dangerously generic word to search for, since it will often match a
157 // name (not cardholder name) field before or after credit card fields. So 34 // name (not cardholder name) field before or after credit card fields. So
158 // we search for "name" only when we've already parsed at least one other 35 // we search for "name" only when we've already parsed at least one other
159 // credit card field and haven't yet parsed the expiration date (which 36 // credit card field and haven't yet parsed the expiration date (which
160 // usually appears at the end). 37 // usually appears at the end).
161 if (credit_card_field->cardholder_ == NULL) { 38 if (credit_card_field->cardholder_ == NULL) {
162 string16 name_pattern; 39 string16 name_pattern;
163 if (fields == 0 || credit_card_field->expiration_month_) { 40 if (fields == 0 || credit_card_field->expiration_month_) {
164 // at beginning or end 41 // at beginning or end
165 name_pattern = UTF8ToUTF16(kNameOnCardRe); 42 name_pattern = UTF8ToUTF16(autofill::kNameOnCardRe);
166 } else { 43 } else {
167 name_pattern = UTF8ToUTF16(kNameOnCardContextualRe); 44 name_pattern = UTF8ToUTF16(autofill::kNameOnCardContextualRe);
168 } 45 }
169 46
170 if (ParseField(scanner, name_pattern, &credit_card_field->cardholder_)) 47 if (ParseField(scanner, name_pattern, &credit_card_field->cardholder_))
171 continue; 48 continue;
172 49
173 // As a hard-coded hack for Expedia's billing pages (expedia_checkout.html 50 // As a hard-coded hack for Expedia's billing pages (expedia_checkout.html
174 // and ExpediaBilling.html in our test suite), recognize separate fields 51 // and ExpediaBilling.html in our test suite), recognize separate fields
175 // for the cardholder's first and last name if they have the labels "cfnm" 52 // for the cardholder's first and last name if they have the labels "cfnm"
176 // and "clnm". 53 // and "clnm".
177 scanner->SaveCursor(); 54 scanner->SaveCursor();
178 const AutofillField* first; 55 const AutofillField* first;
179 if (ParseField(scanner, ASCIIToUTF16("^cfnm"), &first) && 56 if (ParseField(scanner, ASCIIToUTF16("^cfnm"), &first) &&
180 ParseField(scanner, ASCIIToUTF16("^clnm"), 57 ParseField(scanner, ASCIIToUTF16("^clnm"),
181 &credit_card_field->cardholder_last_)) { 58 &credit_card_field->cardholder_last_)) {
182 credit_card_field->cardholder_ = first; 59 credit_card_field->cardholder_ = first;
183 continue; 60 continue;
184 } 61 }
185 scanner->Rewind(); 62 scanner->Rewind();
186 } 63 }
187 64
188 // We look for a card security code before we look for a credit 65 // We look for a card security code before we look for a credit
189 // card number and match the general term "number". The security code 66 // card number and match the general term "number". The security code
190 // has a plethora of names; we've seen "verification #", 67 // has a plethora of names; we've seen "verification #",
191 // "verification number", "card identification number" and others listed 68 // "verification number", "card identification number" and others listed
192 // in the |pattern| below. 69 // in the |pattern| below.
193 string16 pattern = UTF8ToUTF16(kCardCvcRe); 70 string16 pattern = UTF8ToUTF16(autofill::kCardCvcRe);
194 if (!credit_card_field->verification_ && 71 if (!credit_card_field->verification_ &&
195 ParseField(scanner, pattern, &credit_card_field->verification_)) { 72 ParseField(scanner, pattern, &credit_card_field->verification_)) {
196 continue; 73 continue;
197 } 74 }
198 // TODO(jhawkins): Parse the type select control. 75 // TODO(jhawkins): Parse the type select control.
199 76
200 pattern = UTF8ToUTF16(kCardNumberRe); 77 pattern = UTF8ToUTF16(autofill::kCardNumberRe);
201 if (!credit_card_field->number_ && 78 if (!credit_card_field->number_ &&
202 ParseField(scanner, pattern, &credit_card_field->number_)) { 79 ParseField(scanner, pattern, &credit_card_field->number_)) {
203 continue; 80 continue;
204 } 81 }
205 82
206 if (LowerCaseEqualsASCII(scanner->Cursor()->form_control_type, "month")) { 83 if (LowerCaseEqualsASCII(scanner->Cursor()->form_control_type, "month")) {
207 credit_card_field->expiration_month_ = scanner->Cursor(); 84 credit_card_field->expiration_month_ = scanner->Cursor();
208 scanner->Advance(); 85 scanner->Advance();
209 } else { 86 } else {
210 // First try to parse split month/year expiration fields. 87 // First try to parse split month/year expiration fields.
211 scanner->SaveCursor(); 88 scanner->SaveCursor();
212 pattern = UTF8ToUTF16(kExpirationMonthRe); 89 pattern = UTF8ToUTF16(autofill::kExpirationMonthRe);
213 if (!credit_card_field->expiration_month_ && 90 if (!credit_card_field->expiration_month_ &&
214 ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_SELECT, 91 ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_SELECT,
215 &credit_card_field->expiration_month_)) { 92 &credit_card_field->expiration_month_)) {
216 pattern = UTF8ToUTF16(kExpirationYearRe); 93 pattern = UTF8ToUTF16(autofill::kExpirationYearRe);
217 if (ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_SELECT, 94 if (ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_SELECT,
218 &credit_card_field->expiration_year_)) { 95 &credit_card_field->expiration_year_)) {
219 continue; 96 continue;
220 } 97 }
221 } 98 }
222 99
223 // If that fails, try to parse a combined expiration field. 100 // If that fails, try to parse a combined expiration field.
224 if (!credit_card_field->expiration_date_) { 101 if (!credit_card_field->expiration_date_) {
225 // Look for a 2-digit year first. 102 // Look for a 2-digit year first.
226 scanner->Rewind(); 103 scanner->Rewind();
227 pattern = UTF8ToUTF16(kExpirationDate2DigitYearRe); 104 pattern = UTF8ToUTF16(autofill::kExpirationDate2DigitYearRe);
228 if (ParseFieldSpecifics(scanner, pattern, 105 if (ParseFieldSpecifics(scanner, pattern,
229 MATCH_LABEL | MATCH_VALUE | MATCH_TEXT, 106 MATCH_LABEL | MATCH_VALUE | MATCH_TEXT,
230 &credit_card_field->expiration_date_)) { 107 &credit_card_field->expiration_date_)) {
231 credit_card_field->is_two_digit_year_ = true; 108 credit_card_field->is_two_digit_year_ = true;
232 continue; 109 continue;
233 } 110 }
234 111
235 pattern = UTF8ToUTF16(kExpirationDateRe); 112 pattern = UTF8ToUTF16(autofill::kExpirationDateRe);
236 if (ParseFieldSpecifics(scanner, pattern, 113 if (ParseFieldSpecifics(scanner, pattern,
237 MATCH_LABEL | MATCH_VALUE | MATCH_TEXT, 114 MATCH_LABEL | MATCH_VALUE | MATCH_TEXT,
238 &credit_card_field->expiration_date_)) { 115 &credit_card_field->expiration_date_)) {
239 continue; 116 continue;
240 } 117 }
241 } 118 }
242 119
243 if (credit_card_field->expiration_month_ && 120 if (credit_card_field->expiration_month_ &&
244 !credit_card_field->expiration_year_ && 121 !credit_card_field->expiration_year_ &&
245 !credit_card_field->expiration_date_) { 122 !credit_card_field->expiration_date_) {
246 // Parsed a month but couldn't parse a year; give up. 123 // Parsed a month but couldn't parse a year; give up.
247 scanner->RewindTo(saved_cursor); 124 scanner->RewindTo(saved_cursor);
248 return NULL; 125 return NULL;
249 } 126 }
250 } 127 }
251 128
252 // Some pages (e.g. ExpediaBilling.html) have a "card description" 129 // Some pages (e.g. ExpediaBilling.html) have a "card description"
253 // field; we parse this field but ignore it. 130 // field; we parse this field but ignore it.
254 // We also ignore any other fields within a credit card block that 131 // We also ignore any other fields within a credit card block that
255 // start with "card", under the assumption that they are related to 132 // start with "card", under the assumption that they are related to
256 // the credit card section being processed but are uninteresting to us. 133 // the credit card section being processed but are uninteresting to us.
257 if (ParseField(scanner, UTF8ToUTF16(kCardIgnoredRe), NULL)) { 134 if (ParseField(scanner, UTF8ToUTF16(autofill::kCardIgnoredRe), NULL)) {
258 continue; 135 continue;
259 } 136 }
260 137
261 break; 138 break;
262 } 139 }
263 140
264 // Some pages have a billing address field after the cardholder name field. 141 // Some pages have a billing address field after the cardholder name field.
265 // For that case, allow only just the cardholder name field. The remaining 142 // For that case, allow only just the cardholder name field. The remaining
266 // CC fields will be picked up in a following CreditCardField. 143 // CC fields will be picked up in a following CreditCardField.
267 if (credit_card_field->cardholder_) 144 if (credit_card_field->cardholder_)
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
327 map); 204 map);
328 } else { 205 } else {
329 ok = ok && AddClassification(expiration_year_, 206 ok = ok && AddClassification(expiration_year_,
330 CREDIT_CARD_EXP_4_DIGIT_YEAR, 207 CREDIT_CARD_EXP_4_DIGIT_YEAR,
331 map); 208 map);
332 } 209 }
333 } 210 }
334 211
335 return ok; 212 return ok;
336 } 213 }
OLDNEW
« no previous file with comments | « chrome/browser/autofill/autofill_regex_constants.cc.utf8 ('k') | chrome/browser/autofill/email_field.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698